The function actually does two things: scaling unscaled input (or point-
scaled input) into one packed-pixel output line (e.g. YUYV or RGBA), or
scale one line of luma/alpha (unscaled) and two lines of chroma (average)
into one packed-pixel output line. This commits splits this big function
into two functions, each of which do one of the above thing.
---
libswscale/output.c | 130 +++++++++++++++++++++++++++++--------
libswscale/swscale.c | 16 +++--
libswscale/swscale_internal.h | 46 ++++++++++---
libswscale/x86/swscale_template.c | 120 +++++++++++++++++++++++-----------
4 files changed, 229 insertions(+), 83 deletions(-)
diff --git a/libswscale/output.c b/libswscale/output.c
index aa73813..93f1dbf 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -393,13 +393,23 @@ static void name ## ext ## _2_c(SwsContext *c, const
int16_t *buf[2], \
dest, dstW, yalpha, uvalpha, y, fmt); \
} \
\
-static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+static void name ## ext ## _1avg_c(SwsContext *c, const int16_t *buf0, \
const int16_t *ubuf[2], const int16_t
*vbuf[2], \
const int16_t *abuf0, uint8_t *dest, int dstW,
\
- int uvalpha, int y) \
+ int y) \
+{ \
+ name ## base ## _1avg_c_template(c, buf0, ubuf, vbuf, \
+ abuf0, dest, dstW, \
+ y, fmt); \
+} \
+\
+static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+ const int16_t *ubuf, const int16_t *vbuf, \
+ const int16_t *abuf0, uint8_t *dest, int dstW,
\
+ int y) \
{ \
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
- abuf0, dest, dstW, uvalpha, \
+ abuf0, dest, dstW, \
y, fmt); \
}
@@ -481,14 +491,12 @@ yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
static av_always_inline void
yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
- const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *ubuf0, const int16_t *vbuf0,
const int16_t *abuf0, uint8_t *dest, int dstW,
- int uvalpha, int y, enum PixelFormat target)
+ int y, enum PixelFormat target)
{
- const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
- if (uvalpha < 2048) {
for (i = 0; i < (dstW >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
int Y2 = buf0[i * 2 + 1] >> 7;
@@ -497,7 +505,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
output_pixels(i * 4, Y1, U, Y2, V);
}
- } else {
+}
+
+static av_always_inline void
+yuv2422_1avg_c_template(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf0, uint8_t *dest, int dstW,
+ int y, enum PixelFormat target)
+{
+ const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+ int i;
+
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < (dstW >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
@@ -507,7 +525,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
output_pixels(i * 4, Y1, U, Y2, V);
}
- }
}
#undef output_pixels
@@ -627,14 +644,12 @@ yuv2rgb48_2_c_template(SwsContext *c, const int32_t
*buf[2],
static av_always_inline void
yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
- const int32_t *ubuf[2], const int32_t *vbuf[2],
+ const int32_t *ubuf0, const int32_t *vbuf0,
const int32_t *abuf0, uint16_t *dest, int dstW,
- int uvalpha, int y, enum PixelFormat target)
+ int y, enum PixelFormat target)
{
- const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
- if (uvalpha < 2048) {
for (i = 0; i < (dstW >> 1); i++) {
int Y1 = (buf0[i * 2] ) >> 2;
int Y2 = (buf0[i * 2 + 1]) >> 2;
@@ -661,7 +676,17 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
dest += 6;
}
- } else {
+}
+
+static av_always_inline void
+yuv2rgb48_1avg_c_template(SwsContext *c, const int32_t *buf0,
+ const int32_t *ubuf[2], const int32_t *vbuf[2],
+ const int32_t *abuf0, uint16_t *dest, int dstW,
+ int y, enum PixelFormat target)
+{
+ const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+ int i;
+
const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < (dstW >> 1); i++) {
int Y1 = (buf0[i * 2] ) >> 2;
@@ -689,7 +714,6 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
dest += 6;
}
- }
}
#undef output_pixel
@@ -729,17 +753,31 @@ static void name ## ext ## _2_c(SwsContext *c, const
int16_t *_buf[2], \
} \
\
static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
+ const int16_t *_ubuf, const int16_t *_vbuf, \
+ const int16_t *_abuf0, uint8_t *_dest, int dstW, \
+ int y) \
+{ \
+ const int32_t *buf0 = (const int32_t *) _buf0, \
+ *ubuf0 = (const int32_t *) _ubuf, \
+ *vbuf0 = (const int32_t *) _vbuf, \
+ *abuf0 = (const int32_t *) _abuf0; \
+ uint16_t *dest = (uint16_t *) _dest; \
+ name ## base ## _1_c_template(c, buf0, ubuf0, vbuf0, abuf0, dest, \
+ dstW, y, fmt); \
+} \
+\
+static void name ## ext ## _1avg_c(SwsContext *c, const int16_t *_buf0, \
const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
const int16_t *_abuf0, uint8_t *_dest, int dstW, \
- int uvalpha, int y) \
+ int y) \
{ \
const int32_t *buf0 = (const int32_t *) _buf0, \
**ubuf = (const int32_t **) _ubuf, \
**vbuf = (const int32_t **) _vbuf, \
*abuf0 = (const int32_t *) _abuf0; \
uint16_t *dest = (uint16_t *) _dest; \
- name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
- dstW, uvalpha, y, fmt); \
+ name ## base ## _1avg_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+ dstW, y, fmt); \
}
YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
@@ -966,15 +1004,13 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t
*buf[2],
static av_always_inline void
yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
- const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *ubuf0, const int16_t *vbuf0,
const int16_t *abuf0, uint8_t *dest, int dstW,
- int uvalpha, int y, enum PixelFormat target,
+ int y, enum PixelFormat target,
int hasAlpha)
{
- const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
- if (uvalpha < 2048) {
for (i = 0; i < (dstW >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
int Y2 = buf0[i * 2 + 1] >> 7;
@@ -993,7 +1029,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 :
0,
r, g, b, y, target, hasAlpha);
}
- } else {
+}
+
+static av_always_inline void
+yuv2rgb_1avg_c_template(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf0, uint8_t *dest, int dstW,
+ int y, enum PixelFormat target,
+ int hasAlpha)
+{
+ const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+ int i;
+
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < (dstW >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
@@ -1013,7 +1060,6 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 :
0,
r, g, b, y, target, hasAlpha);
}
- }
}
#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
@@ -1040,12 +1086,21 @@ static void name ## ext ## _2_c(SwsContext *c, const
int16_t *buf[2], \
} \
\
static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
+ const int16_t *ubuf0, const int16_t *vbuf0, \
+ const int16_t *abuf0, uint8_t *dest, int dstW,
\
+ int y) \
+{ \
+ name ## base ## _1_c_template(c, buf0, ubuf0, vbuf0, abuf0, dest, \
+ dstW, y, fmt, hasAlpha); \
+} \
+\
+static void name ## ext ## _1avg_c(SwsContext *c, const int16_t *buf0, \
const int16_t *ubuf[2], const int16_t
*vbuf[2], \
const int16_t *abuf0, uint8_t *dest, int dstW,
\
- int uvalpha, int y) \
+ int y) \
{ \
- name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
- dstW, uvalpha, y, fmt, hasAlpha); \
+ name ## base ## _1avg_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+ dstW, y, fmt, hasAlpha); \
}
#if CONFIG_SMALL
@@ -1183,6 +1238,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
yuv2planarX_fn *yuv2planeX,
yuv2interleavedX_fn *yuv2nv12cX,
yuv2packed1_fn *yuv2packed1,
+ yuv2packed1avg_fn *yuv2packed1avg,
yuv2packed2_fn *yuv2packed2,
yuv2packedX_fn *yuv2packedX)
{
@@ -1275,21 +1331,25 @@ void ff_sws_init_output_funcs(SwsContext *c,
switch (dstFormat) {
case PIX_FMT_RGB48LE:
*yuv2packed1 = yuv2rgb48le_1_c;
+ *yuv2packed1avg = yuv2rgb48le_1avg_c;
*yuv2packed2 = yuv2rgb48le_2_c;
*yuv2packedX = yuv2rgb48le_X_c;
break;
case PIX_FMT_RGB48BE:
*yuv2packed1 = yuv2rgb48be_1_c;
+ *yuv2packed1avg = yuv2rgb48be_1avg_c;
*yuv2packed2 = yuv2rgb48be_2_c;
*yuv2packedX = yuv2rgb48be_X_c;
break;
case PIX_FMT_BGR48LE:
*yuv2packed1 = yuv2bgr48le_1_c;
+ *yuv2packed1avg = yuv2bgr48le_1avg_c;
*yuv2packed2 = yuv2bgr48le_2_c;
*yuv2packedX = yuv2bgr48le_X_c;
break;
case PIX_FMT_BGR48BE:
*yuv2packed1 = yuv2bgr48be_1_c;
+ *yuv2packed1avg = yuv2bgr48be_1avg_c;
*yuv2packed2 = yuv2bgr48be_2_c;
*yuv2packedX = yuv2bgr48be_X_c;
break;
@@ -1297,18 +1357,21 @@ void ff_sws_init_output_funcs(SwsContext *c,
case PIX_FMT_BGR32:
#if CONFIG_SMALL
*yuv2packed1 = yuv2rgb32_1_c;
+ *yuv2packed1avg = yuv2rgb32_1avg_c;
*yuv2packed2 = yuv2rgb32_2_c;
*yuv2packedX = yuv2rgb32_X_c;
#else
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packed1 = yuv2rgba32_1_c;
+ *yuv2packed1avg = yuv2rgba32_1avg_c;
*yuv2packed2 = yuv2rgba32_2_c;
*yuv2packedX = yuv2rgba32_X_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packed1 = yuv2rgbx32_1_c;
+ *yuv2packed1avg = yuv2rgbx32_1avg_c;
*yuv2packed2 = yuv2rgbx32_2_c;
*yuv2packedX = yuv2rgbx32_X_c;
}
@@ -1318,18 +1381,21 @@ void ff_sws_init_output_funcs(SwsContext *c,
case PIX_FMT_BGR32_1:
#if CONFIG_SMALL
*yuv2packed1 = yuv2rgb32_1_1_c;
+ *yuv2packed1avg = yuv2rgb32_1_1avg_c;
*yuv2packed2 = yuv2rgb32_1_2_c;
*yuv2packedX = yuv2rgb32_1_X_c;
#else
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packed1 = yuv2rgba32_1_1_c;
+ *yuv2packed1avg = yuv2rgba32_1_1avg_c;
*yuv2packed2 = yuv2rgba32_1_2_c;
*yuv2packedX = yuv2rgba32_1_X_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packed1 = yuv2rgbx32_1_1_c;
+ *yuv2packed1avg = yuv2rgbx32_1_1avg_c;
*yuv2packed2 = yuv2rgbx32_1_2_c;
*yuv2packedX = yuv2rgbx32_1_X_c;
}
@@ -1337,11 +1403,13 @@ void ff_sws_init_output_funcs(SwsContext *c,
break;
case PIX_FMT_RGB24:
*yuv2packed1 = yuv2rgb24_1_c;
+ *yuv2packed1avg = yuv2rgb24_1avg_c;
*yuv2packed2 = yuv2rgb24_2_c;
*yuv2packedX = yuv2rgb24_X_c;
break;
case PIX_FMT_BGR24:
*yuv2packed1 = yuv2bgr24_1_c;
+ *yuv2packed1avg = yuv2bgr24_1avg_c;
*yuv2packed2 = yuv2bgr24_2_c;
*yuv2packedX = yuv2bgr24_X_c;
break;
@@ -1350,6 +1418,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
case PIX_FMT_BGR565LE:
case PIX_FMT_BGR565BE:
*yuv2packed1 = yuv2rgb16_1_c;
+ *yuv2packed1avg = yuv2rgb16_1avg_c;
*yuv2packed2 = yuv2rgb16_2_c;
*yuv2packedX = yuv2rgb16_X_c;
break;
@@ -1358,6 +1427,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
case PIX_FMT_BGR555LE:
case PIX_FMT_BGR555BE:
*yuv2packed1 = yuv2rgb15_1_c;
+ *yuv2packed1avg = yuv2rgb15_1avg_c;
*yuv2packed2 = yuv2rgb15_2_c;
*yuv2packedX = yuv2rgb15_X_c;
break;
@@ -1366,24 +1436,28 @@ void ff_sws_init_output_funcs(SwsContext *c,
case PIX_FMT_BGR444LE:
case PIX_FMT_BGR444BE:
*yuv2packed1 = yuv2rgb12_1_c;
+ *yuv2packed1avg = yuv2rgb12_1avg_c;
*yuv2packed2 = yuv2rgb12_2_c;
*yuv2packedX = yuv2rgb12_X_c;
break;
case PIX_FMT_RGB8:
case PIX_FMT_BGR8:
*yuv2packed1 = yuv2rgb8_1_c;
+ *yuv2packed1avg = yuv2rgb8_1avg_c;
*yuv2packed2 = yuv2rgb8_2_c;
*yuv2packedX = yuv2rgb8_X_c;
break;
case PIX_FMT_RGB4:
case PIX_FMT_BGR4:
*yuv2packed1 = yuv2rgb4_1_c;
+ *yuv2packed1avg = yuv2rgb4_1avg_c;
*yuv2packed2 = yuv2rgb4_2_c;
*yuv2packedX = yuv2rgb4_X_c;
break;
case PIX_FMT_RGB4_BYTE:
case PIX_FMT_BGR4_BYTE:
*yuv2packed1 = yuv2rgb4b_1_c;
+ *yuv2packed1avg = yuv2rgb4b_1avg_c;
*yuv2packed2 = yuv2rgb4b_2_c;
*yuv2packedX = yuv2rgb4b_X_c;
break;
@@ -1400,11 +1474,13 @@ void ff_sws_init_output_funcs(SwsContext *c,
break;
case PIX_FMT_YUYV422:
*yuv2packed1 = yuv2yuyv422_1_c;
+ *yuv2packed1avg = yuv2yuyv422_1avg_c;
*yuv2packed2 = yuv2yuyv422_2_c;
*yuv2packedX = yuv2yuyv422_X_c;
break;
case PIX_FMT_UYVY422:
*yuv2packed1 = yuv2uyvy422_1_c;
+ *yuv2packed1avg = yuv2uyvy422_1avg_c;
*yuv2packed2 = yuv2uyvy422_2_c;
*yuv2packedX = yuv2uyvy422_X_c;
break;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 33f74af..2b2691d 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -342,6 +342,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
yuv2planarX_fn yuv2planeX = c->yuv2planeX;
yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
+ yuv2packed1avg_fn yuv2packed1avg = c->yuv2packed1avg;
yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
yuv2packedX_fn yuv2packedX = c->yuv2packedX;
int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat) ||
@@ -508,7 +509,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
if (dstY >= dstH-2) {
// hmm looks like we can't use MMX here without overwriting this
array's tail
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
- &yuv2packed1, &yuv2packed2, &yuv2packedX);
+ &yuv2packed1, &yuv2packed1avg,
+ &yuv2packed2, &yuv2packedX);
}
{
@@ -596,11 +598,14 @@ static int swScale(SwsContext *c, const uint8_t* src[],
} else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf +
vLumBufSize*2);
assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf +
vChrBufSize*2);
- if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize <=
2) { //unscaled RGB
- int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 *
dstY + 1];
- yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
+ if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize ==
1) { //unscaled RGB
+ yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *chrVSrcPtr,
alpPixBuf ? *alpSrcPtr : NULL,
- dest[0], dstW, chrAlpha, dstY);
+ dest[0], dstW, dstY);
+ } else if (c->yuv2packed1avg && vLumFilterSize == 1 &&
vChrFilterSize == 2) { //unscaled RGB
+ yuv2packed1avg(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
+ alpPixBuf ? *alpSrcPtr : NULL,
+ dest[0], dstW, dstY);
} else if (c->yuv2packed2 && vLumFilterSize == 2 &&
vChrFilterSize == 2) { //bilinear upscale RGB
int lumAlpha = vLumFilter[2 * dstY + 1];
int chrAlpha = vChrFilter[2 * dstY + 1];
@@ -647,6 +652,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
&c->yuv2nv12cX, &c->yuv2packed1,
+ &c->yuv2packed1avg,
&c->yuv2packed2, &c->yuv2packedX);
ff_sws_init_input_funcs(c);
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index bc36826..c059a4f 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -116,8 +116,7 @@ typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
- * output without any additional vertical scaling (or point-scaling). Note
- * that this function may do chroma scaling, see the "uvalpha" argument.
+ * output without any additional vertical scaling (or point-scaling).
*
* @param c SWS scaling context
* @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output,
@@ -132,22 +131,45 @@ typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
* uint16_t
* @param dstW width of lumSrc and alpSrc in pixels, number of pixels
* to write into dest[]
- * @param uvalpha chroma scaling coefficient for the second line of chroma
- * pixels, either 2048 or 0. If 0, one chroma input is used
- * for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
- * is set, it generates 1 output pixel). If 2048, two chroma
- * input pixels should be averaged for 2 output pixels (this
- * only happens if SWS_FLAG_FULL_CHR_INT is not set)
* @param y vertical line number for this output. This does not need
* to be used to calculate the offset in the destination,
* but can be used to generate comfort noise using dithering
* for some output formats.
*/
typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc,
- const int16_t *chrUSrc[2],
- const int16_t *chrVSrc[2],
+ const int16_t *chrUSrc,
+ const int16_t *chrVSrc,
const int16_t *alpSrc, uint8_t *dest,
- int dstW, int uvalpha, int y);
+ int dstW, int y);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output without any additional vertical scaling (or point-scaling). The
+ * chroma pixel is interpolated from two source lines, by averaging the
+ * two source values.
+ *
+ * @param c SWS scaling context
+ * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output,
+ * 19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ * 19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ * 19-bit for 16bit output (in int32_t)
+ * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output,
+ * 19-bit for 16bit output (in int32_t)
+ * @param dest pointer to the output plane. For 16bit output, this is
+ * uint16_t
+ * @param dstW width of lumSrc and alpSrc in pixels, number of pixels
+ * to write into dest[]
+ * @param y vertical line number for this output. This does not need
+ * to be used to calculate the offset in the destination,
+ * but can be used to generate comfort noise using dithering
+ * for some output formats.
+ */
+typedef void (*yuv2packed1avg_fn)(struct SwsContext *c, const int16_t *lumSrc,
+ const int16_t *chrUSrc[2],
+ const int16_t *chrVSrc[2],
+ const int16_t *alpSrc, uint8_t *dest,
+ int dstW, int y);
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
* output by doing bilinear scaling between two input lines.
@@ -423,6 +445,7 @@ typedef struct SwsContext {
yuv2planarX_fn yuv2planeX;
yuv2interleavedX_fn yuv2nv12cX;
yuv2packed1_fn yuv2packed1;
+ yuv2packed1avg_fn yuv2packed1avg;
yuv2packed2_fn yuv2packed2;
yuv2packedX_fn yuv2packedX;
@@ -656,6 +679,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
yuv2planarX_fn *yuv2planeX,
yuv2interleavedX_fn *yuv2nv12cX,
yuv2packed1_fn *yuv2packed1,
+ yuv2packed1avg_fn *yuv2packed1avg,
yuv2packed2_fn *yuv2packed2,
yuv2packedX_fn *yuv2packedX);
void ff_sws_init_swScale_altivec(SwsContext *c);
diff --git a/libswscale/x86/swscale_template.c
b/libswscale/x86/swscale_template.c
index 4db3fb3..18abefc 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1088,15 +1088,14 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const
int16_t *buf[2],
* YV12 to RGB without scaling or interpolating
*/
static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
- const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *ubuf0, const int16_t *vbuf0,
const int16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, int y)
+ int dstW, int y)
{
- const int16_t *ubuf0 = ubuf[0];
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const int16_t *ubuf1 = ubuf0;
- if (uvalpha < 2048) { // note this is not correct (shifts chrominance by
0.5 pixels) but it is a bit faster
- const int16_t *ubuf1 = ubuf[0];
+ // note this is not correct (shifts chrominance by 0.5 pixels) but it is a
bit faster
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
@@ -1124,8 +1123,17 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const
int16_t *buf0,
"a" (&c->redDither)
);
}
- } else {
- const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2rgb32_1avg)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int y)
+{
+ const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
@@ -1153,19 +1161,17 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const
int16_t *buf0,
"a" (&c->redDither)
);
}
- }
}
static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
- const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *ubuf0, const int16_t *vbuf0,
const int16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, int y)
+ int dstW, int y)
{
- const int16_t *ubuf0 = ubuf[0];
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const int16_t *ubuf1 = ubuf0;
- if (uvalpha < 2048) { // note this is not correct (shifts chrominance by
0.5 pixels) but it is a bit faster
- const int16_t *ubuf1 = ubuf[0];
+ // note this is not correct (shifts chrominance by 0.5 pixels) but it is a
bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1178,8 +1184,17 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- } else {
- const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2bgr24_1avg)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int y)
+{
+ const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1192,19 +1207,17 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- }
}
static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
- const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *ubuf0, const int16_t *vbuf0,
const int16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, int y)
+ int dstW, int y)
{
- const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf0;
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
- if (uvalpha < 2048) { // note this is not correct (shifts chrominance by
0.5 pixels) but it is a bit faster
- const int16_t *ubuf1 = ubuf[0];
+ // note this is not correct (shifts chrominance by 0.5 pixels) but it is a
bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1223,8 +1236,17 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- } else {
- const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2rgb555_1avg)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int y)
+{
+ const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1243,19 +1265,17 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- }
}
static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
- const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *ubuf0, const int16_t *vbuf0,
const int16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, int y)
+ int dstW, int y)
{
- const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf0;
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
- if (uvalpha < 2048) { // note this is not correct (shifts chrominance by
0.5 pixels) but it is a bit faster
- const int16_t *ubuf1 = ubuf[0];
+ // note this is not correct (shifts chrominance by 0.5 pixels) but it is a
bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1274,8 +1294,17 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- } else {
- const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2rgb565_1avg)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int y)
+{
+ const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1294,7 +1323,6 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- }
}
#define REAL_YSCALEYUV2PACKED1(index, c) \
@@ -1335,15 +1363,14 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const
int16_t *buf0,
#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
- const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *ubuf0, const int16_t *vbuf0,
const int16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, int y)
+ int dstW, int y)
{
- const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf0;
const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
- if (uvalpha < 2048) { // note this is not correct (shifts chrominance by
0.5 pixels) but it is a bit faster
- const int16_t *ubuf1 = ubuf[0];
+ // note this is not correct (shifts chrominance by 0.5 pixels) but it is a
bit faster
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1355,8 +1382,17 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- } else {
- const int16_t *ubuf1 = ubuf[1];
+}
+
+static void RENAME(yuv2yuyv422_1avg)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t
*vbuf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int y)
+{
+ const int16_t *ubuf0 = ubuf[0];
+ const int16_t *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1368,7 +1404,6 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const
int16_t *buf0,
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
);
- }
}
#if COMPILE_TEMPLATE_MMX2
@@ -1590,22 +1625,27 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext
*c)
switch (c->dstFormat) {
case PIX_FMT_RGB32:
c->yuv2packed1 = RENAME(yuv2rgb32_1);
+ c->yuv2packed1avg = RENAME(yuv2rgb32_1avg);
c->yuv2packed2 = RENAME(yuv2rgb32_2);
break;
case PIX_FMT_BGR24:
c->yuv2packed1 = RENAME(yuv2bgr24_1);
+ c->yuv2packed1avg = RENAME(yuv2bgr24_1avg);
c->yuv2packed2 = RENAME(yuv2bgr24_2);
break;
case PIX_FMT_RGB555:
c->yuv2packed1 = RENAME(yuv2rgb555_1);
+ c->yuv2packed1avg = RENAME(yuv2rgb555_1avg);
c->yuv2packed2 = RENAME(yuv2rgb555_2);
break;
case PIX_FMT_RGB565:
c->yuv2packed1 = RENAME(yuv2rgb565_1);
+ c->yuv2packed1avg = RENAME(yuv2rgb565_1avg);
c->yuv2packed2 = RENAME(yuv2rgb565_2);
break;
case PIX_FMT_YUYV422:
c->yuv2packed1 = RENAME(yuv2yuyv422_1);
+ c->yuv2packed1avg = RENAME(yuv2yuyv422_1avg);
c->yuv2packed2 = RENAME(yuv2yuyv422_2);
break;
default:
--
1.7.2.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel