Hi folks!

Here's an interesting exercise for you:

cd xc/programs/Xserver/hw/xfree86/drivers
grep -i mungeddata */*.c

Scary, huh?

All of those functions do the same thing, modulo bugs and performance.
And there's a few more of them, with different names. (grep -i copyYV)

Now, this function is smack in the middle of the critical path for
pretty much any Xv app, so performance matters here. And most of these
functions have a lot of room for improvement. The code is a nice
candidate for the various MMX/SSE/MVI/whatever extensions different
arches have. But it helps to have it in a central place then. So...

Attached is a patch that
a) Adds new files xc/programs/Xserver/hw/xfree86/common/xvconv.[ch]
   which contains working code for this. (from the nv driver, originally)

b) Renames the function from <drivername>CopyMungedData to YV12toUYVY,
   since that's what it does.

c) Makes the Imakefile in common respect the BuildXvExt and BuildXvMCExt
   defines.

d) Removes the private versions of the code from a couple of drivers
   (radeon, mga, s3, nv) and makes them use the central one.

Now, while I'm pretty sure it does fix real bugs (anyone with a radeon 
in a big-endian machine feel like trying a Xv program?), I consider this
mostly a cleanup, (the possible bugs are obviously pretty obscure, since
they haven't beeen found yet) and it can certainly wait until after 4.3.

I indend to follow up with some faster code, and maybe patches to
make more drivers use this (unless other driver maintainers do it
first) later.

I'm not sure about the naming (the new files, and the function. I
considered xf86XVYV12toUYVY, but that was just too scary...)
It's not clear to me if the functions called xf86* are supposed to
be some kind of interface or not. I also thought about putting it in the
common/xf86xv.c file. Comments?

/August. Oh yeah, and it makes a non DoLoadableServer server a bit
         smaller too. :)
-- 
Bj|rn Augustsson      DCE/DFS Sysadmin     IT Systems & Services
Chalmers tekniska h|gskola     Chalmers University of Technology
         "Damn spooky analog crap." -- John Carmack.
Index: programs/Xserver/hw/xfree86/common/Imakefile
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/common/Imakefile,v
retrieving revision 3.148
diff -u -r3.148 Imakefile
--- programs/Xserver/hw/xfree86/common/Imakefile        2003/02/17 17:06:41     3.148
+++ programs/Xserver/hw/xfree86/common/Imakefile        2003/02/24 01:26:24
@@ -50,11 +50,21 @@
 #endif
 
 #if BuildRandR
- RANDRINCS = -I../../../randr
+  RANDRINCS = -I../../../randr
   RANDRSRC = xf86RandR.c
   RANDROBJ = xf86RandR.o
 #endif
 
+#if BuildXvExt
+    XVSRC = xf86xv.c xvconv.c
+    XVOBJ = xf86xv.o xvconv.o
+#endif
+
+#if BuildXvMCExt
+    XVMCSRC = xf86xvmc.c
+    XVMCOBJ = xf86xvmc.o
+#endif
+
 MODPATHDEFINES = -DDEFAULT_MODULE_PATH=\"$(MODULEDIR)\"
 LOGDEFINES = -DDEFAULT_LOGPREFIX=\"$(LOGDIRECTORY)/XLogFile.\"
 
@@ -112,8 +122,6 @@
        xf86Option.c \
        xf86VidMode.c \
        xf86fbman.c \
-       xf86xv.c \
-       xf86xvmc.c \
        xf86cmap.c\
        xf86PM.c \
        $(DEBUGSRC) \
@@ -124,7 +132,9 @@
        $(XKBDDXSRC) \
        $(BETASRC) \
        $(SERVERSRCS) \
-        $(RANDRSRC)
+        $(RANDRSRC) \
+       $(XVSRC) \
+       $(XVMCSRC)
 
 OBJS = \
        xf86Configure.o \
@@ -149,8 +159,6 @@
        xf86Option.o \
        xf86VidMode.o \
        xf86fbman.o \
-       xf86xv.o \
-       xf86xvmc.o \
        xf86cmap.o\
        xf86PM.o \
        $(DEBUGOBJ) \
@@ -159,7 +167,9 @@
        $(XKBDDXOBJ) \
        $(BETAOBJ) \
        $(KBD).o \
-        $(RANDROBJ)
+        $(RANDROBJ) \
+       $(XVOBJ) \
+       $(XVMCOBJ)
 
 OFILES = \
        xf86Init.o \
Index: programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c,v
retrieving revision 1.24
diff -u -r1.24 radeon_video.c
--- programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c      2003/02/19 01:19:43    
 1.24
+++ programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c      2003/02/24 01:26:25
@@ -10,6 +10,7 @@
 
 #include "Xv.h"
 #include "fourcc.h"
+#include "xvconv.h"
 
 #define OFF_DELAY       250  /* milliseconds */
 #define FREE_DELAY      15000
@@ -869,52 +870,6 @@
     }
 }
 
-static void
-RADEONCopyMungedData(
-   unsigned char *src1,
-   unsigned char *src2,
-   unsigned char *src3,
-   unsigned char *dst1,
-   int srcPitch,
-   int srcPitch2,
-   int dstPitch,
-   int h,
-   int w
-){
-   CARD32 *dst;
-   CARD8 *s1, *s2, *s3;
-   int i, j;
-
-   w >>= 1;
-
-   for(j = 0; j < h; j++) {
-       dst = (pointer)dst1;
-       s1 = src1;  s2 = src2;  s3 = src3;
-       i = w;
-       while(i > 4) {
-          dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-          dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
-          dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
-          dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
-          dst += 4; s2 += 4; s3 += 4; s1 += 8;
-          i -= 4;
-       }
-       while(i--) {
-          dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-          dst++; s2++; s3++;
-          s1 += 2;
-       }
-
-       dst1 += dstPitch;
-       src1 += srcPitch;
-       if(j & 1) {
-           src2 += srcPitch2;
-           src3 += srcPitch2;
-       }
-   }
-}
-
-
 static FBLinearPtr
 RADEONAllocateMemory(
    ScrnInfoPtr pScrn,
@@ -1247,9 +1202,9 @@
        OUTREG(RADEON_SURFACE_CNTL, (surface_cntl | RADEON_NONSURF_AP0_SWP_32BPP)
                                    & ~RADEON_NONSURF_AP0_SWP_16BPP);
 #endif
-       RADEONCopyMungedData(buf + (top * srcPitch) + left, buf + s2offset,
-                            buf + s3offset, dst_start, srcPitch, srcPitch2,
-                            dstPitch, nlines, npixels);
+       YV12toUYVY(buf + (top * srcPitch) + left, buf + s2offset,
+                  buf + s3offset, dst_start, srcPitch, srcPitch2,
+                  dstPitch, nlines, npixels);
        break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
Index: programs/Xserver/hw/xfree86/drivers/mga/mga_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/mga/mga_video.c,v
retrieving revision 1.29
diff -u -r1.29 mga_video.c
--- programs/Xserver/hw/xfree86/drivers/mga/mga_video.c 2001/12/26 14:54:04     1.29
+++ programs/Xserver/hw/xfree86/drivers/mga/mga_video.c 2003/02/24 01:26:25
@@ -20,6 +20,7 @@
 #include "xaalocal.h"
 #include "dixstruct.h"
 #include "fourcc.h"
+#include "xvconv.h"
 
 #define OFF_DELAY      250  /* milliseconds */
 #define FREE_DELAY     15000
@@ -591,53 +592,6 @@
     }
 }
 
-static void
-MGACopyMungedData(
-   unsigned char *src1,
-   unsigned char *src2,
-   unsigned char *src3,
-   unsigned char *dst1,
-   int srcPitch,
-   int srcPitch2,
-   int dstPitch,
-   int h,
-   int w
-){
-   CARD32 *dst;
-   CARD8 *s1, *s2, *s3;
-   int i, j;
-
-   w >>= 1;
-
-   for(j = 0; j < h; j++) {
-        dst = (CARD32*)dst1;
-        s1 = src1;  s2 = src2;  s3 = src3;
-        i = w;
-        while(i > 4) {
-           dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-           dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
-           dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
-           dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
-           dst += 4; s2 += 4; s3 += 4; s1 += 8;
-           i -= 4;
-        }
-
-        while(i--) {
-           dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-           dst++; s2++; s3++;
-           s1 += 2;
-        }
-
-        dst1 += dstPitch;
-        src1 += srcPitch;
-        if(j & 1) {
-            src2 += srcPitch2;
-            src3 += srcPitch2;
-        }
-   }
-}
-
-
 static FBLinearPtr
 MGAAllocateMemory(
    ScrnInfoPtr pScrn,
@@ -939,9 +893,9 @@
           offset3 = tmp;
        }
        nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
-       MGACopyMungedData(buf + (top * srcPitch) + (left >> 1), 
-                         buf + offset2, buf + offset3, dst_start,
-                         srcPitch, srcPitch2, dstPitch, nlines, npixels);
+       YV12toUYVY(buf + (top * srcPitch) + (left >> 1), 
+                  buf + offset2, buf + offset3, dst_start,
+                  srcPitch, srcPitch2, dstPitch, nlines, npixels);
        break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
Index: programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c,v
retrieving revision 1.4
diff -u -r1.4 neo_video.c
--- programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c    2002/11/25 14:05:00    
 1.4
+++ programs/Xserver/hw/xfree86/drivers/neomagic/neo_video.c    2003/02/24 01:26:26
@@ -39,6 +39,7 @@
 #include "dixstruct.h"
 #include "xaa.h"
 #include "xaalocal.h"
+#include "xvconv.h"
 
 static XF86VideoAdaptorPtr NEOSetupVideo(ScreenPtr);
 
@@ -706,7 +707,7 @@
        offset2 += tmp;
        offset3 += tmp;
        nLines = ((((y2 + 0xFFFF) >> 16) + 1) & ~1) - top;
-       NEOCopyYV12Data(buf + (top * srcPitch) + (left >> 1), buf + offset2, 
+       YV12toYUV2(buf + (top * srcPitch) + (left >> 1), buf + offset2, 
                        buf + offset3, dstStart, srcPitch, srcPitch2, 
                        dstPitch, nLines, nPixels);
        break;
@@ -1026,37 +1027,6 @@
        memcpy(dst, src, width);
        src += srcPitch;
        dst += dstPitch;
-    }
-}
-
-static void
-NEOCopyYV12Data(unsigned char *src1, unsigned char *src2,
-               unsigned char *src3, unsigned char *dst,
-               int srcPitch1, int srcPitch2, int dstPitch,
-               int height, int width)
-{
-    CARD32 *pDst = (CARD32 *) dst;
-    int i;
-
-    width >>= 1;
-    height >>= 1;
-    dstPitch >>= 2;
-    while (--height >= 0){
-       for (i =0; i < width; i++){
-           pDst[i] = src1[i << 1] | (src1[(i << 1) + 1] << 16) |
-               (src3[i] << 8) | (src2[i] << 24);
-       }
-       pDst += dstPitch;
-       src1 += srcPitch1;
-
-       for (i =0; i < width; i++){
-           pDst[i] = src1[i << 1] | (src1[(i << 1) + 1] << 16) |
-               (src3[i] << 8) | (src2[i] << 24);
-       }
-       pDst += dstPitch;
-       src1 += srcPitch1;
-           src2 += srcPitch2;
-           src3 += srcPitch2;
     }
 }
 
Index: programs/Xserver/hw/xfree86/drivers/nv/nv_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/nv/nv_video.c,v
retrieving revision 1.11
diff -u -r1.11 nv_video.c
--- programs/Xserver/hw/xfree86/drivers/nv/nv_video.c   2002/11/26 23:41:59     1.11
+++ programs/Xserver/hw/xfree86/drivers/nv/nv_video.c   2003/02/24 01:26:26
@@ -16,6 +16,7 @@
 #include "xaalocal.h"
 #include "dixstruct.h"
 #include "fourcc.h"
+#include "xvconv.h"
 
 #include "nv_include.h"
 
@@ -634,66 +635,7 @@
         dst += dstPitch;
     }
 }
-/*
- * CopyMungedData
- */
-static void NVCopyData420
-(
-    unsigned char *src1,
-    unsigned char *src2,
-    unsigned char *src3,
-    unsigned char *dst1,
-    int            srcPitch,
-    int            srcPitch2,
-    int            dstPitch,
-    int            h,
-    int            w
-)
-{
-   CARD32 *dst;
-   CARD8 *s1, *s2, *s3;
-   int i, j;
-
-   w >>= 1;
-
-   for(j = 0; j < h; j++) {
-        dst = (CARD32*)dst1;
-        s1 = src1;  s2 = src2;  s3 = src3;
-        i = w;
-        while(i > 4) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-           dst[0] = (s1[0] << 24) | (s1[1] << 8) | (s3[0] << 16) | s2[0];
-           dst[1] = (s1[2] << 24) | (s1[3] << 8) | (s3[1] << 16) | s2[1];
-           dst[2] = (s1[4] << 24) | (s1[5] << 8) | (s3[2] << 16) | s2[2];
-           dst[3] = (s1[6] << 24) | (s1[7] << 8) | (s3[3] << 16) | s2[3];
-#else
-           dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-           dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
-           dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
-           dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
-#endif
-           dst += 4; s2 += 4; s3 += 4; s1 += 8;
-           i -= 4;
-        }
 
-        while(i--) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-           dst[0] = (s1[0] << 24) | (s1[1] << 8) | (s3[0] << 16) | s2[0];
-#else
-           dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-#endif
-           dst++; s2++; s3++;
-           s1 += 2;
-        }
-
-        dst1 += dstPitch;
-        src1 += srcPitch;
-        if(j & 1) {
-            src2 += srcPitch2;
-            src3 += srcPitch2;
-        }
-   }
-}
 /*
  * PutImage
  */
@@ -842,7 +784,7 @@
            s3offset = tmp;
         }
         nlines = ((((yb + 0xffff) >> 16) + 1) & ~1) - top;
-        NVCopyData420(buf + (top * srcPitch) + left, buf + s2offset,
+        YV12toUYVY(buf + (top * srcPitch) + left, buf + s2offset,
                            buf + s3offset, dst_start, srcPitch, srcPitch2,
                            dstPitch, nlines, npixels);
         break;
Index: programs/Xserver/hw/xfree86/drivers/s3/s3_video.c
===================================================================
RCS file: /cvs/xc/programs/Xserver/hw/xfree86/drivers/s3/s3_video.c,v
retrieving revision 1.2
diff -u -r1.2 s3_video.c
--- programs/Xserver/hw/xfree86/drivers/s3/s3_video.c   2001/08/15 11:54:27     1.2
+++ programs/Xserver/hw/xfree86/drivers/s3/s3_video.c   2003/02/24 01:26:27
@@ -31,6 +31,7 @@
 #include "xf86_ansic.h"
 
 #include "compiler.h"
+#include "xvconv.h"
 
 #include "s3.h"
 #include "s3_reg.h"
@@ -180,33 +181,6 @@
 }
 
 
-static void S3CopyMungedData(unsigned char *src1, unsigned char *src2,
-                            unsigned char *src3, unsigned char *dst1,
-                            int srcPitch, int srcPitch2, int dstPitch,
-                            int h, int w)
-{
-       CARD32 *dst = (CARD32*)dst1;
-       int i, j;
-
-       dstPitch >>= 2;
-       w >>= 1;
-
-       for(j = 0; j < h; j++) {
-               for(i = 0; i < w; i++) {
-                       dst[i] = src1[i << 1] | (src1[(i << 1) + 1] << 16) |
-                                (src3[i] << 8) | (src2[i] << 24);
-               }
-               dst += dstPitch;
-               src1 += srcPitch;
-               if(j & 1) {
-                       src2 += srcPitch2;
-                       src3 += srcPitch2;
-                       }
-       }
-}
-
-
-
 static void S3ResetVideoOverlay(ScrnInfoPtr pScrn)
 {
 }
@@ -577,7 +551,7 @@
            offset3 = tmp;
         }
         nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
-        S3CopyMungedData(buf + (top * srcPitch) + (left >> 1),
+        YV12toUYVY(buf + (top * srcPitch) + (left >> 1),
                          buf + offset2, buf + offset3, dst_start,
                          srcPitch, srcPitch2, dstPitch, nlines, npixels);
         once2 = 0;

Reply via email to