Hi folks!
Here's an interesting exercise for you:
cd xc/programs/Xserver/hw/xfree86/drivers
grep -i mungeddata */*.c
Scary, huh?
All of those functions do the same thing, modulo bugs and performance.
And there's a few more of them, with different names. (grep -i copyYV)
Now, this function is smack in the middle of the critical path for
pretty much any Xv app, so performance matters here. And most of these
functions have a lot of room for improvement. The code is a nice
candidate for the various MMX/SSE/MVI/whatever extensions different
arches have. But it helps to have it in a central place then. So...
Attached is a patch that
a) Adds new files xc/programs/Xserver/hw/xfree86/common/xvconv.[ch]
which contains working code for this. (from the nv driver, originally)
b) Renames the function from <drivername>CopyMungedData to YV12toUYVY,
since that's what it does.
c) Makes the Imakefile in common respect the BuildXvExt and BuildXvMCExt
defines.
d) Removes the private versions of the code from a couple of drivers
(radeon, mga, s3, nv) and makes them use the central one.
Now, while I'm pretty sure it does fix real bugs (anyone with a radeon
in a big-endian machine feel like trying a Xv program?), I consider this
mostly a cleanup, (the possible bugs are obviously pretty obscure, since
they haven't beeen found yet) and it can certainly wait until after 4.3.
I indend to follow up with some faster code, and maybe patches to
make more drivers use this (unless other driver maintainers do it
first) later.
I'm not sure about the naming (the new files, and the function. I
considered xf86XVYV12toUYVY, but that was just too scary...)
It's not clear to me if the functions called xf86* are supposed to
be some kind of interface or not. I also thought about putting it in the
common/xf86xv.c file. Comments?
/August. Oh yeah, and it makes a non DoLoadableServer server a bit
smaller too. :)
--
Bj|rn Augustsson DCE/DFS Sysadmin IT Systems & Services
Chalmers tekniska h|gskola Chalmers University of Technology
"Damn spooky analog crap." -- John Carmack.
Index: programs/Xserver/hw/xfree86/common/Imakefile
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/common/Imakefile,v
retrieving revision 3.148
diff -u -r3.148 Imakefile
--- programs/Xserver/hw/xfree86/common/Imakefile 2003/02/17 17:06:41 3.148
+++ programs/Xserver/hw/xfree86/common/Imakefile 2003/02/24 01:26:24
@@ -50,11 +50,21 @@
#endif
#if BuildRandR
- RANDRINCS = -I../../../randr
+ RANDRINCS = -I../../../randr
RANDRSRC = xf86RandR.c
RANDROBJ = xf86RandR.o
#endif
+#if BuildXvExt
+ XVSRC = xf86xv.c xvconv.c
+ XVOBJ = xf86xv.o xvconv.o
+#endif
+
+#if BuildXvMCExt
+ XVMCSRC = xf86xvmc.c
+ XVMCOBJ = xf86xvmc.o
+#endif
+
MODPATHDEFINES = -DDEFAULT_MODULE_PATH=\"$(MODULEDIR)\"
LOGDEFINES = -DDEFAULT_LOGPREFIX=\"$(LOGDIRECTORY)/XLogFile.\"
@@ -112,8 +122,6 @@
xf86Option.c \
xf86VidMode.c \
xf86fbman.c \
- xf86xv.c \
- xf86xvmc.c \
xf86cmap.c\
xf86PM.c \
$(DEBUGSRC) \
@@ -124,7 +132,9 @@
$(XKBDDXSRC) \
$(BETASRC) \
$(SERVERSRCS) \
- $(RANDRSRC)
+ $(RANDRSRC) \
+ $(XVSRC) \
+ $(XVMCSRC)
OBJS = \
xf86Configure.o \
@@ -149,8 +159,6 @@
xf86Option.o \
xf86VidMode.o \
xf86fbman.o \
- xf86xv.o \
- xf86xvmc.o \
xf86cmap.o\
xf86PM.o \
$(DEBUGOBJ) \
@@ -159,7 +167,9 @@
$(XKBDDXOBJ) \
$(BETAOBJ) \
$(KBD).o \
- $(RANDROBJ)
+ $(RANDROBJ) \
+ $(XVOBJ) \
+ $(XVMCOBJ)
OFILES = \
xf86Init.o \
Index: programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c,v
retrieving revision 1.24
diff -u -r1.24 radeon_video.c
--- programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c 2003/02/19 01:19:43
1.24
+++ programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c 2003/02/24 01:26:25
@@ -10,6 +10,7 @@
#include "Xv.h"
#include "fourcc.h"
+#include "xvconv.h"
#define OFF_DELAY 250 /* milliseconds */
#define FREE_DELAY 15000
@@ -869,52 +870,6 @@
}
}
-static void
-RADEONCopyMungedData(
- unsigned char *src1,
- unsigned char *src2,
- unsigned char *src3,
- unsigned char *dst1,
- int srcPitch,
- int srcPitch2,
- int dstPitch,
- int h,
- int w
-){
- CARD32 *dst;
- CARD8 *s1, *s2, *s3;
- int i, j;
-
- w >>= 1;
-
- for(j = 0; j < h; j++) {
- dst = (pointer)dst1;
- s1 = src1; s2 = src2; s3 = src3;
- i = w;
- while(i > 4) {
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
- dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
- dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
- dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
- dst += 4; s2 += 4; s3 += 4; s1 += 8;
- i -= 4;
- }
- while(i--) {
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
- dst++; s2++; s3++;
- s1 += 2;
- }
-
- dst1 += dstPitch;
- src1 += srcPitch;
- if(j & 1) {
- src2 += srcPitch2;
- src3 += srcPitch2;
- }
- }
-}
-
-
static FBLinearPtr
RADEONAllocateMemory(
ScrnInfoPtr pScrn,
@@ -1247,9 +1202,9 @@
OUTREG(RADEON_SURFACE_CNTL, (surface_cntl | RADEON_NONSURF_AP0_SWP_32BPP)
& ~RADEON_NONSURF_AP0_SWP_16BPP);
#endif
- RADEONCopyMungedData(buf + (top * srcPitch) + left, buf + s2offset,
- buf + s3offset, dst_start, srcPitch, srcPitch2,
- dstPitch, nlines, npixels);
+ YV12toUYVY(buf + (top * srcPitch) + left, buf + s2offset,
+ buf + s3offset, dst_start, srcPitch, srcPitch2,
+ dstPitch, nlines, npixels);
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
Index: programs/Xserver/hw/xfree86/drivers/mga/mga_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/mga/mga_video.c,v
retrieving revision 1.29
diff -u -r1.29 mga_video.c
--- programs/Xserver/hw/xfree86/drivers/mga/mga_video.c 2001/12/26 14:54:04 1.29
+++ programs/Xserver/hw/xfree86/drivers/mga/mga_video.c 2003/02/24 01:26:25
@@ -20,6 +20,7 @@
#include "xaalocal.h"
#include "dixstruct.h"
#include "fourcc.h"
+#include "xvconv.h"
#define OFF_DELAY 250 /* milliseconds */
#define FREE_DELAY 15000
@@ -591,53 +592,6 @@
}
}
-static void
-MGACopyMungedData(
- unsigned char *src1,
- unsigned char *src2,
- unsigned char *src3,
- unsigned char *dst1,
- int srcPitch,
- int srcPitch2,
- int dstPitch,
- int h,
- int w
-){
- CARD32 *dst;
- CARD8 *s1, *s2, *s3;
- int i, j;
-
- w >>= 1;
-
- for(j = 0; j < h; j++) {
- dst = (CARD32*)dst1;
- s1 = src1; s2 = src2; s3 = src3;
- i = w;
- while(i > 4) {
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
- dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
- dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
- dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
- dst += 4; s2 += 4; s3 += 4; s1 += 8;
- i -= 4;
- }
-
- while(i--) {
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
- dst++; s2++; s3++;
- s1 += 2;
- }
-
- dst1 += dstPitch;
- src1 += srcPitch;
- if(j & 1) {
- src2 += srcPitch2;
- src3 += srcPitch2;
- }
- }
-}
-
-
static FBLinearPtr
MGAAllocateMemory(
ScrnInfoPtr pScrn,
@@ -939,9 +893,9 @@
offset3 = tmp;
}
nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
- MGACopyMungedData(buf + (top * srcPitch) + (left >> 1),
- buf + offset2, buf + offset3, dst_start,
- srcPitch, srcPitch2, dstPitch, nlines, npixels);
+ YV12toUYVY(buf + (top * srcPitch) + (left >> 1),
+ buf + offset2, buf + offset3, dst_start,
+ srcPitch, srcPitch2, dstPitch, nlines, npixels);
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
Index: programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c,v
retrieving revision 1.4
diff -u -r1.4 neo_video.c
--- programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c 2002/11/25 14:05:00
1.4
+++ programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c 2003/02/24 01:26:26
@@ -39,6 +39,7 @@
#include "dixstruct.h"
#include "xaa.h"
#include "xaalocal.h"
+#include "xvconv.h"
static XF86VideoAdaptorPtr NEOSetupVideo(ScreenPtr);
@@ -706,7 +707,7 @@
offset2 += tmp;
offset3 += tmp;
nLines = ((((y2 + 0xFFFF) >> 16) + 1) & ~1) - top;
- NEOCopyYV12Data(buf + (top * srcPitch) + (left >> 1), buf + offset2,
+ YV12toYUV2(buf + (top * srcPitch) + (left >> 1), buf + offset2,
buf + offset3, dstStart, srcPitch, srcPitch2,
dstPitch, nLines, nPixels);
break;
@@ -1026,37 +1027,6 @@
memcpy(dst, src, width);
src += srcPitch;
dst += dstPitch;
- }
-}
-
-static void
-NEOCopyYV12Data(unsigned char *src1, unsigned char *src2,
- unsigned char *src3, unsigned char *dst,
- int srcPitch1, int srcPitch2, int dstPitch,
- int height, int width)
-{
- CARD32 *pDst = (CARD32 *) dst;
- int i;
-
- width >>= 1;
- height >>= 1;
- dstPitch >>= 2;
- while (--height >= 0){
- for (i =0; i < width; i++){
- pDst[i] = src1[i << 1] | (src1[(i << 1) + 1] << 16) |
- (src3[i] << 8) | (src2[i] << 24);
- }
- pDst += dstPitch;
- src1 += srcPitch1;
-
- for (i =0; i < width; i++){
- pDst[i] = src1[i << 1] | (src1[(i << 1) + 1] << 16) |
- (src3[i] << 8) | (src2[i] << 24);
- }
- pDst += dstPitch;
- src1 += srcPitch1;
- src2 += srcPitch2;
- src3 += srcPitch2;
}
}
Index: programs/Xserver/hw/xfree86/drivers/nv/nv_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/nv/nv_video.c,v
retrieving revision 1.11
diff -u -r1.11 nv_video.c
--- programs/Xserver/hw/xfree86/drivers/nv/nv_video.c 2002/11/26 23:41:59 1.11
+++ programs/Xserver/hw/xfree86/drivers/nv/nv_video.c 2003/02/24 01:26:26
@@ -16,6 +16,7 @@
#include "xaalocal.h"
#include "dixstruct.h"
#include "fourcc.h"
+#include "xvconv.h"
#include "nv_include.h"
@@ -634,66 +635,7 @@
dst += dstPitch;
}
}
-/*
- * CopyMungedData
- */
-static void NVCopyData420
-(
- unsigned char *src1,
- unsigned char *src2,
- unsigned char *src3,
- unsigned char *dst1,
- int srcPitch,
- int srcPitch2,
- int dstPitch,
- int h,
- int w
-)
-{
- CARD32 *dst;
- CARD8 *s1, *s2, *s3;
- int i, j;
-
- w >>= 1;
-
- for(j = 0; j < h; j++) {
- dst = (CARD32*)dst1;
- s1 = src1; s2 = src2; s3 = src3;
- i = w;
- while(i > 4) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- dst[0] = (s1[0] << 24) | (s1[1] << 8) | (s3[0] << 16) | s2[0];
- dst[1] = (s1[2] << 24) | (s1[3] << 8) | (s3[1] << 16) | s2[1];
- dst[2] = (s1[4] << 24) | (s1[5] << 8) | (s3[2] << 16) | s2[2];
- dst[3] = (s1[6] << 24) | (s1[7] << 8) | (s3[3] << 16) | s2[3];
-#else
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
- dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
- dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
- dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
-#endif
- dst += 4; s2 += 4; s3 += 4; s1 += 8;
- i -= 4;
- }
- while(i--) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- dst[0] = (s1[0] << 24) | (s1[1] << 8) | (s3[0] << 16) | s2[0];
-#else
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-#endif
- dst++; s2++; s3++;
- s1 += 2;
- }
-
- dst1 += dstPitch;
- src1 += srcPitch;
- if(j & 1) {
- src2 += srcPitch2;
- src3 += srcPitch2;
- }
- }
-}
/*
* PutImage
*/
@@ -842,7 +784,7 @@
s3offset = tmp;
}
nlines = ((((yb + 0xffff) >> 16) + 1) & ~1) - top;
- NVCopyData420(buf + (top * srcPitch) + left, buf + s2offset,
+ YV12toUYVY(buf + (top * srcPitch) + left, buf + s2offset,
buf + s3offset, dst_start, srcPitch, srcPitch2,
dstPitch, nlines, npixels);
break;
Index: programs/Xserver/hw/xfree86/drivers/s3/s3_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/s3/s3_video.c,v
retrieving revision 1.2
diff -u -r1.2 s3_video.c
--- programs/Xserver/hw/xfree86/drivers/s3/s3_video.c 2001/08/15 11:54:27 1.2
+++ programs/Xserver/hw/xfree86/drivers/s3/s3_video.c 2003/02/24 01:26:27
@@ -31,6 +31,7 @@
#include "xf86_ansic.h"
#include "compiler.h"
+#include "xvconv.h"
#include "s3.h"
#include "s3_reg.h"
@@ -180,33 +181,6 @@
}
-static void S3CopyMungedData(unsigned char *src1, unsigned char *src2,
- unsigned char *src3, unsigned char *dst1,
- int srcPitch, int srcPitch2, int dstPitch,
- int h, int w)
-{
- CARD32 *dst = (CARD32*)dst1;
- int i, j;
-
- dstPitch >>= 2;
- w >>= 1;
-
- for(j = 0; j < h; j++) {
- for(i = 0; i < w; i++) {
- dst[i] = src1[i << 1] | (src1[(i << 1) + 1] << 16) |
- (src3[i] << 8) | (src2[i] << 24);
- }
- dst += dstPitch;
- src1 += srcPitch;
- if(j & 1) {
- src2 += srcPitch2;
- src3 += srcPitch2;
- }
- }
-}
-
-
-
static void S3ResetVideoOverlay(ScrnInfoPtr pScrn)
{
}
@@ -577,7 +551,7 @@
offset3 = tmp;
}
nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
- S3CopyMungedData(buf + (top * srcPitch) + (left >> 1),
+ YV12toUYVY(buf + (top * srcPitch) + (left >> 1),
buf + offset2, buf + offset3, dst_start,
srcPitch, srcPitch2, dstPitch, nlines, npixels);
once2 = 0;