[libav-devel] [PATCH] swscale: split yuv2packed1 functions.

Ronald S. Bultje Sun, 19 Feb 2012 17:14:15 -0800

The function actually does two things: scaling unscaled input (or point-
scaled input) into one packed-pixel output line (e.g. YUYV or RGBA), or
scale one line of luma/alpha (unscaled) and two lines of chroma (average)
into one packed-pixel output line. This commits splits this big function
into two functions, each of which do one of the above thing.
---
 libswscale/output.c               |  130 +++++++++++++++++++++++++++++--------
 libswscale/swscale.c              |   16 +++--
 libswscale/swscale_internal.h     |   46 ++++++++++---
 libswscale/x86/swscale_template.c |  120 +++++++++++++++++++++++-----------
 4 files changed, 229 insertions(+), 83 deletions(-)


diff --git a/libswscale/output.c b/libswscale/output.c
index aa73813..93f1dbf 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -393,13 +393,23 @@ static void name ## ext ## _2_c(SwsContext *c, const 
int16_t *buf[2], \
                                   dest, dstW, yalpha, uvalpha, y, fmt); \
 } \
  \
-static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+static void name ## ext ## _1avg_c(SwsContext *c, const int16_t *buf0, \
                                 const int16_t *ubuf[2], const int16_t 
*vbuf[2], \
                                 const int16_t *abuf0, uint8_t *dest, int dstW, 
\
-                                int uvalpha, int y) \
+                                int y) \
+{ \
+    name ## base ## _1avg_c_template(c, buf0, ubuf, vbuf, \
+                                     abuf0, dest, dstW, \
+                                     y, fmt); \
+} \
+\
+static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+                                const int16_t *ubuf, const int16_t *vbuf, \
+                                const int16_t *abuf0, uint8_t *dest, int dstW, 
\
+                                int y) \
 { \
     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
-                                  abuf0, dest, dstW, uvalpha, \
+                                  abuf0, dest, dstW, \
                                   y, fmt); \
 }
 
@@ -481,14 +491,12 @@ yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
 
 static av_always_inline void
 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
+                     const int16_t *ubuf0, const int16_t *vbuf0,
                      const int16_t *abuf0, uint8_t *dest, int dstW,
-                     int uvalpha, int y, enum PixelFormat target)
+                     int y, enum PixelFormat target)
 {
-    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
 
-    if (uvalpha < 2048) {
         for (i = 0; i < (dstW >> 1); i++) {
             int Y1 = buf0[i * 2]     >> 7;
             int Y2 = buf0[i * 2 + 1] >> 7;
@@ -497,7 +505,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
 
             output_pixels(i * 4, Y1, U, Y2, V);
         }
-    } else {
+}
+
+static av_always_inline void
+yuv2422_1avg_c_template(SwsContext *c, const int16_t *buf0,
+                        const int16_t *ubuf[2], const int16_t *vbuf[2],
+                        const int16_t *abuf0, uint8_t *dest, int dstW,
+                        int y, enum PixelFormat target)
+{
+    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+    int i;
+
         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
         for (i = 0; i < (dstW >> 1); i++) {
             int Y1 =  buf0[i * 2]          >> 7;
@@ -507,7 +525,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
 
             output_pixels(i * 4, Y1, U, Y2, V);
         }
-    }
 }
 
 #undef output_pixels
@@ -627,14 +644,12 @@ yuv2rgb48_2_c_template(SwsContext *c, const int32_t 
*buf[2],
 
 static av_always_inline void
 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
-                       const int32_t *ubuf[2], const int32_t *vbuf[2],
+                       const int32_t *ubuf0, const int32_t *vbuf0,
                        const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum PixelFormat target)
+                       int y, enum PixelFormat target)
 {
-    const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
 
-    if (uvalpha < 2048) {
         for (i = 0; i < (dstW >> 1); i++) {
             int Y1 = (buf0[i * 2]    ) >> 2;
             int Y2 = (buf0[i * 2 + 1]) >> 2;
@@ -661,7 +676,17 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
             dest += 6;
         }
-    } else {
+}
+
+static av_always_inline void
+yuv2rgb48_1avg_c_template(SwsContext *c, const int32_t *buf0,
+                          const int32_t *ubuf[2], const int32_t *vbuf[2],
+                          const int32_t *abuf0, uint16_t *dest, int dstW,
+                          int y, enum PixelFormat target)
+{
+    const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+    int i;
+
         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
         for (i = 0; i < (dstW >> 1); i++) {
             int Y1 = (buf0[i * 2]    ) >> 2;
@@ -689,7 +714,6 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
             dest += 6;
         }
-    }
 }
 
 #undef output_pixel
@@ -729,17 +753,31 @@ static void name ## ext ## _2_c(SwsContext *c, const 
int16_t *_buf[2], \
 } \
  \
 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
+                        const int16_t *_ubuf, const int16_t *_vbuf, \
+                        const int16_t *_abuf0, uint8_t *_dest, int dstW, \
+                        int y) \
+{ \
+    const int32_t *buf0  = (const int32_t *) _buf0, \
+                  *ubuf0 = (const int32_t *) _ubuf, \
+                  *vbuf0 = (const int32_t *) _vbuf, \
+                  *abuf0 = (const int32_t *) _abuf0; \
+    uint16_t *dest = (uint16_t *) _dest; \
+    name ## base ## _1_c_template(c, buf0, ubuf0, vbuf0, abuf0, dest, \
+                                  dstW, y, fmt); \
+} \
+\
+static void name ## ext ## _1avg_c(SwsContext *c, const int16_t *_buf0, \
                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
                         const int16_t *_abuf0, uint8_t *_dest, int dstW, \
-                        int uvalpha, int y) \
+                        int y) \
 { \
     const int32_t *buf0  = (const int32_t *)  _buf0, \
                  **ubuf  = (const int32_t **) _ubuf, \
                  **vbuf  = (const int32_t **) _vbuf, \
                   *abuf0 = (const int32_t *)  _abuf0; \
     uint16_t *dest = (uint16_t *) _dest; \
-    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
-                                  dstW, uvalpha, y, fmt); \
+    name ## base ## _1avg_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+                                     dstW, y, fmt); \
 }
 
 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
@@ -966,15 +1004,13 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t 
*buf[2],
 
 static av_always_inline void
 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
-                     const int16_t *ubuf[2], const int16_t *vbuf[2],
+                     const int16_t *ubuf0, const int16_t *vbuf0,
                      const int16_t *abuf0, uint8_t *dest, int dstW,
-                     int uvalpha, int y, enum PixelFormat target,
+                     int y, enum PixelFormat target,
                      int hasAlpha)
 {
-    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
 
-    if (uvalpha < 2048) {
         for (i = 0; i < (dstW >> 1); i++) {
             int Y1 = buf0[i * 2]     >> 7;
             int Y2 = buf0[i * 2 + 1] >> 7;
@@ -993,7 +1029,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 
0,
                           r, g, b, y, target, hasAlpha);
         }
-    } else {
+}
+
+static av_always_inline void
+yuv2rgb_1avg_c_template(SwsContext *c, const int16_t *buf0,
+                        const int16_t *ubuf[2], const int16_t *vbuf[2],
+                        const int16_t *abuf0, uint8_t *dest, int dstW,
+                        int y, enum PixelFormat target,
+                        int hasAlpha)
+{
+    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+    int i;
+
         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
         for (i = 0; i < (dstW >> 1); i++) {
             int Y1 =  buf0[i * 2]          >> 7;
@@ -1013,7 +1060,6 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 
0,
                           r, g, b, y, target, hasAlpha);
         }
-    }
 }
 
 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
@@ -1040,12 +1086,21 @@ static void name ## ext ## _2_c(SwsContext *c, const 
int16_t *buf[2], \
 } \
  \
 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+                                const int16_t *ubuf0, const int16_t *vbuf0, \
+                                const int16_t *abuf0, uint8_t *dest, int dstW, 
\
+                                int y) \
+{ \
+    name ## base ## _1_c_template(c, buf0, ubuf0, vbuf0, abuf0, dest, \
+                                  dstW, y, fmt, hasAlpha); \
+} \
+\
+static void name ## ext ## _1avg_c(SwsContext *c, const int16_t *buf0, \
                                 const int16_t *ubuf[2], const int16_t 
*vbuf[2], \
                                 const int16_t *abuf0, uint8_t *dest, int dstW, 
\
-                                int uvalpha, int y) \
+                                int y) \
 { \
-    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
-                                  dstW, uvalpha, y, fmt, hasAlpha); \
+    name ## base ## _1avg_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+                                     dstW, y, fmt, hasAlpha); \
 }
 
 #if CONFIG_SMALL
@@ -1183,6 +1238,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
                               yuv2planarX_fn *yuv2planeX,
                               yuv2interleavedX_fn *yuv2nv12cX,
                               yuv2packed1_fn *yuv2packed1,
+                              yuv2packed1avg_fn *yuv2packed1avg,
                               yuv2packed2_fn *yuv2packed2,
                               yuv2packedX_fn *yuv2packedX)
 {
@@ -1275,21 +1331,25 @@ void ff_sws_init_output_funcs(SwsContext *c,
         switch (dstFormat) {
         case PIX_FMT_RGB48LE:
             *yuv2packed1 = yuv2rgb48le_1_c;
+            *yuv2packed1avg = yuv2rgb48le_1avg_c;
             *yuv2packed2 = yuv2rgb48le_2_c;
             *yuv2packedX = yuv2rgb48le_X_c;
             break;
         case PIX_FMT_RGB48BE:
             *yuv2packed1 = yuv2rgb48be_1_c;
+            *yuv2packed1avg = yuv2rgb48be_1avg_c;
             *yuv2packed2 = yuv2rgb48be_2_c;
             *yuv2packedX = yuv2rgb48be_X_c;
             break;
         case PIX_FMT_BGR48LE:
             *yuv2packed1 = yuv2bgr48le_1_c;
+            *yuv2packed1avg = yuv2bgr48le_1avg_c;
             *yuv2packed2 = yuv2bgr48le_2_c;
             *yuv2packedX = yuv2bgr48le_X_c;
             break;
         case PIX_FMT_BGR48BE:
             *yuv2packed1 = yuv2bgr48be_1_c;
+            *yuv2packed1avg = yuv2bgr48be_1avg_c;
             *yuv2packed2 = yuv2bgr48be_2_c;
             *yuv2packedX = yuv2bgr48be_X_c;
             break;
@@ -1297,18 +1357,21 @@ void ff_sws_init_output_funcs(SwsContext *c,
         case PIX_FMT_BGR32:
 #if CONFIG_SMALL
             *yuv2packed1 = yuv2rgb32_1_c;
+            *yuv2packed1avg = yuv2rgb32_1avg_c;
             *yuv2packed2 = yuv2rgb32_2_c;
             *yuv2packedX = yuv2rgb32_X_c;
 #else
 #if CONFIG_SWSCALE_ALPHA
                 if (c->alpPixBuf) {
                     *yuv2packed1 = yuv2rgba32_1_c;
+                    *yuv2packed1avg = yuv2rgba32_1avg_c;
                     *yuv2packed2 = yuv2rgba32_2_c;
                     *yuv2packedX = yuv2rgba32_X_c;
                 } else
 #endif /* CONFIG_SWSCALE_ALPHA */
                 {
                     *yuv2packed1 = yuv2rgbx32_1_c;
+                    *yuv2packed1avg = yuv2rgbx32_1avg_c;
                     *yuv2packed2 = yuv2rgbx32_2_c;
                     *yuv2packedX = yuv2rgbx32_X_c;
                 }
@@ -1318,18 +1381,21 @@ void ff_sws_init_output_funcs(SwsContext *c,
         case PIX_FMT_BGR32_1:
 #if CONFIG_SMALL
                 *yuv2packed1 = yuv2rgb32_1_1_c;
+                *yuv2packed1avg = yuv2rgb32_1_1avg_c;
                 *yuv2packed2 = yuv2rgb32_1_2_c;
                 *yuv2packedX = yuv2rgb32_1_X_c;
 #else
 #if CONFIG_SWSCALE_ALPHA
                 if (c->alpPixBuf) {
                     *yuv2packed1 = yuv2rgba32_1_1_c;
+                    *yuv2packed1avg = yuv2rgba32_1_1avg_c;
                     *yuv2packed2 = yuv2rgba32_1_2_c;
                     *yuv2packedX = yuv2rgba32_1_X_c;
                 } else
 #endif /* CONFIG_SWSCALE_ALPHA */
                 {
                     *yuv2packed1 = yuv2rgbx32_1_1_c;
+                    *yuv2packed1avg = yuv2rgbx32_1_1avg_c;
                     *yuv2packed2 = yuv2rgbx32_1_2_c;
                     *yuv2packedX = yuv2rgbx32_1_X_c;
                 }
@@ -1337,11 +1403,13 @@ void ff_sws_init_output_funcs(SwsContext *c,
                 break;
         case PIX_FMT_RGB24:
             *yuv2packed1 = yuv2rgb24_1_c;
+            *yuv2packed1avg = yuv2rgb24_1avg_c;
             *yuv2packed2 = yuv2rgb24_2_c;
             *yuv2packedX = yuv2rgb24_X_c;
             break;
         case PIX_FMT_BGR24:
             *yuv2packed1 = yuv2bgr24_1_c;
+            *yuv2packed1avg = yuv2bgr24_1avg_c;
             *yuv2packed2 = yuv2bgr24_2_c;
             *yuv2packedX = yuv2bgr24_X_c;
             break;
@@ -1350,6 +1418,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
         case PIX_FMT_BGR565LE:
         case PIX_FMT_BGR565BE:
             *yuv2packed1 = yuv2rgb16_1_c;
+            *yuv2packed1avg = yuv2rgb16_1avg_c;
             *yuv2packed2 = yuv2rgb16_2_c;
             *yuv2packedX = yuv2rgb16_X_c;
             break;
@@ -1358,6 +1427,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
         case PIX_FMT_BGR555LE:
         case PIX_FMT_BGR555BE:
             *yuv2packed1 = yuv2rgb15_1_c;
+            *yuv2packed1avg = yuv2rgb15_1avg_c;
             *yuv2packed2 = yuv2rgb15_2_c;
             *yuv2packedX = yuv2rgb15_X_c;
             break;
@@ -1366,24 +1436,28 @@ void ff_sws_init_output_funcs(SwsContext *c,
         case PIX_FMT_BGR444LE:
         case PIX_FMT_BGR444BE:
             *yuv2packed1 = yuv2rgb12_1_c;
+            *yuv2packed1avg = yuv2rgb12_1avg_c;
             *yuv2packed2 = yuv2rgb12_2_c;
             *yuv2packedX = yuv2rgb12_X_c;
             break;
         case PIX_FMT_RGB8:
         case PIX_FMT_BGR8:
             *yuv2packed1 = yuv2rgb8_1_c;
+            *yuv2packed1avg = yuv2rgb8_1avg_c;
             *yuv2packed2 = yuv2rgb8_2_c;
             *yuv2packedX = yuv2rgb8_X_c;
             break;
         case PIX_FMT_RGB4:
         case PIX_FMT_BGR4:
             *yuv2packed1 = yuv2rgb4_1_c;
+            *yuv2packed1avg = yuv2rgb4_1avg_c;
             *yuv2packed2 = yuv2rgb4_2_c;
             *yuv2packedX = yuv2rgb4_X_c;
             break;
         case PIX_FMT_RGB4_BYTE:
         case PIX_FMT_BGR4_BYTE:
             *yuv2packed1 = yuv2rgb4b_1_c;
+            *yuv2packed1avg = yuv2rgb4b_1avg_c;
             *yuv2packed2 = yuv2rgb4b_2_c;
             *yuv2packedX = yuv2rgb4b_X_c;
             break;
@@ -1400,11 +1474,13 @@ void ff_sws_init_output_funcs(SwsContext *c,
         break;
     case PIX_FMT_YUYV422:
         *yuv2packed1 = yuv2yuyv422_1_c;
+        *yuv2packed1avg = yuv2yuyv422_1avg_c;
         *yuv2packed2 = yuv2yuyv422_2_c;
         *yuv2packedX = yuv2yuyv422_X_c;
         break;
     case PIX_FMT_UYVY422:
         *yuv2packed1 = yuv2uyvy422_1_c;
+        *yuv2packed1avg = yuv2uyvy422_1avg_c;
         *yuv2packed2 = yuv2uyvy422_2_c;
         *yuv2packedX = yuv2uyvy422_X_c;
         break;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 33f74af..2b2691d 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -342,6 +342,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     yuv2planarX_fn yuv2planeX = c->yuv2planeX;
     yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
+    yuv2packed1avg_fn yuv2packed1avg = c->yuv2packed1avg;
     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
     int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat) ||
@@ -508,7 +509,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
         if (dstY >= dstH-2) {
             // hmm looks like we can't use MMX here without overwriting this 
array's tail
             ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX,  &yuv2nv12cX,
-                                     &yuv2packed1, &yuv2packed2, &yuv2packedX);
+                                     &yuv2packed1, &yuv2packed1avg,
+                                     &yuv2packed2, &yuv2packedX);
         }
 
         {
@@ -596,11 +598,14 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             } else {
                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + 
vLumBufSize*2);
                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + 
vChrBufSize*2);
-                if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize <= 
2) { //unscaled RGB
-                    int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * 
dstY + 1];
-                    yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
+                if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 
1) { //unscaled RGB
+                    yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *chrVSrcPtr,
                                 alpPixBuf ? *alpSrcPtr : NULL,
-                                dest[0], dstW, chrAlpha, dstY);
+                                dest[0], dstW, dstY);
+                } else if (c->yuv2packed1avg && vLumFilterSize == 1 && 
vChrFilterSize == 2) { //unscaled RGB
+                    yuv2packed1avg(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
+                                   alpPixBuf ? *alpSrcPtr : NULL,
+                                   dest[0], dstW, dstY);
                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && 
vChrFilterSize == 2) { //bilinear upscale RGB
                     int lumAlpha = vLumFilter[2 * dstY + 1];
                     int chrAlpha = vChrFilter[2 * dstY + 1];
@@ -647,6 +652,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
 
     ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
                              &c->yuv2nv12cX, &c->yuv2packed1,
+                             &c->yuv2packed1avg,
                              &c->yuv2packed2, &c->yuv2packedX);
 
     ff_sws_init_input_funcs(c);
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index bc36826..c059a4f 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -116,8 +116,7 @@ typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
 
 /**
  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
- * output without any additional vertical scaling (or point-scaling). Note
- * that this function may do chroma scaling, see the "uvalpha" argument.
+ * output without any additional vertical scaling (or point-scaling).
  *
  * @param c       SWS scaling context
  * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
@@ -132,22 +131,45 @@ typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
  *                uint16_t
  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
  *                to write into dest[]
- * @param uvalpha chroma scaling coefficient for the second line of chroma
- *                pixels, either 2048 or 0. If 0, one chroma input is used
- *                for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
- *                is set, it generates 1 output pixel). If 2048, two chroma
- *                input pixels should be averaged for 2 output pixels (this
- *                only happens if SWS_FLAG_FULL_CHR_INT is not set)
  * @param y       vertical line number for this output. This does not need
  *                to be used to calculate the offset in the destination,
  *                but can be used to generate comfort noise using dithering
  *                for some output formats.
  */
 typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc,
-                               const int16_t *chrUSrc[2],
-                               const int16_t *chrVSrc[2],
+                               const int16_t *chrUSrc,
+                               const int16_t *chrVSrc,
                                const int16_t *alpSrc, uint8_t *dest,
-                               int dstW, int uvalpha, int y);
+                               int dstW, int y);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output without any additional vertical scaling (or point-scaling). The
+ * chroma pixel is interpolated from two source lines, by averaging the
+ * two source values.
+ *
+ * @param c       SWS scaling context
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param dest    pointer to the output plane. For 16bit output, this is
+ *                uint16_t
+ * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
+ *                to write into dest[]
+ * @param y       vertical line number for this output. This does not need
+ *                to be used to calculate the offset in the destination,
+ *                but can be used to generate comfort noise using dithering
+ *                for some output formats.
+ */
+typedef void (*yuv2packed1avg_fn)(struct SwsContext *c, const int16_t *lumSrc,
+                                  const int16_t *chrUSrc[2],
+                                  const int16_t *chrVSrc[2],
+                                  const int16_t *alpSrc, uint8_t *dest,
+                                  int dstW, int y);
 /**
  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
  * output by doing bilinear scaling between two input lines.
@@ -423,6 +445,7 @@ typedef struct SwsContext {
     yuv2planarX_fn yuv2planeX;
     yuv2interleavedX_fn yuv2nv12cX;
     yuv2packed1_fn yuv2packed1;
+    yuv2packed1avg_fn yuv2packed1avg;
     yuv2packed2_fn yuv2packed2;
     yuv2packedX_fn yuv2packedX;
 
@@ -656,6 +679,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
                               yuv2planarX_fn *yuv2planeX,
                               yuv2interleavedX_fn *yuv2nv12cX,
                               yuv2packed1_fn *yuv2packed1,
+                              yuv2packed1avg_fn *yuv2packed1avg,
                               yuv2packed2_fn *yuv2packed2,
                               yuv2packedX_fn *yuv2packedX);
 void ff_sws_init_swScale_altivec(SwsContext *c);
diff --git a/libswscale/x86/swscale_template.c 
b/libswscale/x86/swscale_template.c
index 4db3fb3..18abefc 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1088,15 +1088,14 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const 
int16_t *buf[2],
  * YV12 to RGB without scaling or interpolating
  */
 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
-                                const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                const int16_t *ubuf0, const int16_t *vbuf0,
                                 const int16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, int y)
+                                int dstW, int y)
 {
-    const int16_t *ubuf0 = ubuf[0];
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf1 = ubuf0;
 
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 
0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
+    // note this is not correct (shifts chrominance by 0.5 pixels) but it is a 
bit faster
         if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1124,8 +1123,17 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const 
int16_t *buf0,
                    "a" (&c->redDither)
             );
         }
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2rgb32_1avg)(SwsContext *c, const int16_t *buf0,
+                                   const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                   const int16_t *abuf0, uint8_t *dest,
+                                   int dstW, int y)
+{
+    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
         if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
             __asm__ volatile(
                 "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@@ -1153,19 +1161,17 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const 
int16_t *buf0,
                    "a" (&c->redDither)
             );
         }
-    }
 }
 
 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
-                                const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                const int16_t *ubuf0, const int16_t *vbuf0,
                                 const int16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, int y)
+                                int dstW, int y)
 {
-    const int16_t *ubuf0 = ubuf[0];
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf1 = ubuf0;
 
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 
0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
+    // note this is not correct (shifts chrominance by 0.5 pixels) but it is a 
bit faster
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1178,8 +1184,17 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2bgr24_1avg)(SwsContext *c, const int16_t *buf0,
+                                   const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                   const int16_t *abuf0, uint8_t *dest,
+                                   int dstW, int y)
+{
+    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1192,19 +1207,17 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    }
 }
 
 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
-                                 const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                 const int16_t *ubuf0, const int16_t *vbuf0,
                                  const int16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, int y)
+                                 int dstW, int y)
 {
-    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf0;
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 
0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
+    // note this is not correct (shifts chrominance by 0.5 pixels) but it is a 
bit faster
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1223,8 +1236,17 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2rgb555_1avg)(SwsContext *c, const int16_t *buf0,
+                                    const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                    const int16_t *abuf0, uint8_t *dest,
+                                    int dstW, int y)
+{
+    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1243,19 +1265,17 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    }
 }
 
 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
-                                 const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                 const int16_t *ubuf0, const int16_t *vbuf0,
                                  const int16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, int y)
+                                 int dstW, int y)
 {
-    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf0;
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 
0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
+    // note this is not correct (shifts chrominance by 0.5 pixels) but it is a 
bit faster
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1274,8 +1294,17 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2rgb565_1avg)(SwsContext *c, const int16_t *buf0,
+                                    const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                    const int16_t *abuf0, uint8_t *dest,
+                                    int dstW, int y)
+{
+    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1294,7 +1323,6 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    }
 }
 
 #define REAL_YSCALEYUV2PACKED1(index, c) \
@@ -1335,15 +1363,14 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const 
int16_t *buf0,
 #define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
 
 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
-                                  const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                  const int16_t *ubuf0, const int16_t *vbuf0,
                                   const int16_t *abuf0, uint8_t *dest,
-                                  int dstW, int uvalpha, int y)
+                                  int dstW, int y)
 {
-    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf0;
     const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
-    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 
0.5 pixels) but it is a bit faster
-        const int16_t *ubuf1 = ubuf[0];
+    // note this is not correct (shifts chrominance by 0.5 pixels) but it is a 
bit faster
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1355,8 +1382,17 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    } else {
-        const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2yuyv422_1avg)(SwsContext *c, const int16_t *buf0,
+                                     const int16_t *ubuf[2], const int16_t 
*vbuf[2],
+                                     const int16_t *abuf0, uint8_t *dest,
+                                     int dstW, int y)
+{
+    const int16_t *ubuf0 = ubuf[0];
+    const int16_t *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1368,7 +1404,6 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const 
int16_t *buf0,
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
-    }
 }
 
 #if COMPILE_TEMPLATE_MMX2
@@ -1590,22 +1625,27 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext 
*c)
             switch (c->dstFormat) {
             case PIX_FMT_RGB32:
                 c->yuv2packed1 = RENAME(yuv2rgb32_1);
+                c->yuv2packed1avg = RENAME(yuv2rgb32_1avg);
                 c->yuv2packed2 = RENAME(yuv2rgb32_2);
                 break;
             case PIX_FMT_BGR24:
                 c->yuv2packed1 = RENAME(yuv2bgr24_1);
+                c->yuv2packed1avg = RENAME(yuv2bgr24_1avg);
                 c->yuv2packed2 = RENAME(yuv2bgr24_2);
                 break;
             case PIX_FMT_RGB555:
                 c->yuv2packed1 = RENAME(yuv2rgb555_1);
+                c->yuv2packed1avg = RENAME(yuv2rgb555_1avg);
                 c->yuv2packed2 = RENAME(yuv2rgb555_2);
                 break;
             case PIX_FMT_RGB565:
                 c->yuv2packed1 = RENAME(yuv2rgb565_1);
+                c->yuv2packed1avg = RENAME(yuv2rgb565_1avg);
                 c->yuv2packed2 = RENAME(yuv2rgb565_2);
                 break;
             case PIX_FMT_YUYV422:
                 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
+                c->yuv2packed1avg = RENAME(yuv2yuyv422_1avg);
                 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
                 break;
             default:
-- 
1.7.2.1

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] swscale: split yuv2packed1 functions.

Reply via email to