[FFmpeg-devel] [PATCH 2/3] swscale: Add p016 output support and generalise yuv420p1x to p010

Philip Langdale Thu, 01 Mar 2018 20:33:42 -0800

To make the best use of existing code, I generalised the wrapper
that currently does yuv420p10 to p010 to support any mixture of
input and output sizes between 10 and 16 bits. This had the side
effect of yielding a working code path for all yuv420p1x formats
to p01x.


Signed-off-by: Philip Langdale <phil...@overt.org>
---
 libswscale/output.c           | 31 +++++++++++++++++++++++++++++++
 libswscale/swscale_unscaled.c | 35 +++++++++++++++++++++++++----------
 libswscale/utils.c            |  4 ++--
 3 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index f30bce8dd3..0af2fffea4 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -180,6 +180,34 @@ yuv2planeX_16_c_template(const int16_t *filter, int 
filterSize,
     }
 }
 
+static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int 
chrFilterSize,
+                         const int16_t **chrUSrc, const int16_t **chrVSrc,
+                         uint8_t *dest8, int chrDstW)
+{
+    uint16_t *dest = (uint16_t*)dest8;
+    const int32_t **uSrc = (const int32_t **)chrUSrc;
+    const int32_t **vSrc = (const int32_t **)chrVSrc;
+    int shift = 15;
+    int big_endian = c->dstFormat == AV_PIX_FMT_P016BE;
+    int i, j;
+
+    for (i = 0; i < chrDstW; i++) {
+        int u = 1 << (shift - 1);
+        int v = 1 << (shift - 1);
+
+        /* See yuv2planeX_16_c_template for details. */
+        u -= 0x40000000;
+        v -= 0x40000000;
+        for (j = 0; j < chrFilterSize; j++) {
+            u += uSrc[j][i] * (unsigned)chrFilter[j];
+            v += vSrc[j][i] * (unsigned)chrFilter[j];
+        }
+
+        output_pixel(&dest[2*i]  , u, 0x8000, int);
+        output_pixel(&dest[2*i+1], v, 0x8000, int);
+    }
+}
+
 #undef output_pixel
 
 #define output_pixel(pos, val) \
@@ -2257,6 +2285,9 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
     } else if (is16BPS(dstFormat)) {
         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
+        if (dstFormat == AV_PIX_FMT_P016LE || dstFormat == AV_PIX_FMT_P016BE) {
+          *yuv2nv12cX = yuv2p016cX_c;
+        }
     } else if (isNBPS(dstFormat)) {
         if (desc->comp[0].depth == 9) {
             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : 
yuv2planeX_9LE_c;
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 5ec2116bcf..766c9b4872 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -180,16 +180,28 @@ static int nv12ToPlanarWrapper(SwsContext *c, const 
uint8_t *src[],
     return srcSliceH;
 }
 
-static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[],
+static int planarToP01xWrapper(SwsContext *c, const uint8_t *src8[],
                                int srcStride[], int srcSliceY,
                                int srcSliceH, uint8_t *dstParam8[],
                                int dstStride[])
 {
+    const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat);
+    const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat);
     const uint16_t **src = (const uint16_t**)src8;
     uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY);
     uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2);
     int x, y;
 
+    /* Calculate net shift required for values. */
+    const int shift[3] = {
+        dst_format->comp[0].depth + dst_format->comp[0].shift -
+        src_format->comp[0].depth - src_format->comp[0].shift,
+        dst_format->comp[1].depth + dst_format->comp[1].shift -
+        src_format->comp[1].depth - src_format->comp[1].shift,
+        dst_format->comp[2].depth + dst_format->comp[2].shift -
+        src_format->comp[2].depth - src_format->comp[2].shift,
+    };
+
     av_assert0(!(srcStride[0] % 2 || srcStride[1] % 2 || srcStride[2] % 2 ||
                  dstStride[0] % 2 || dstStride[1] % 2));
 
@@ -197,7 +209,7 @@ static int planarToP010Wrapper(SwsContext *c, const uint8_t 
*src8[],
         uint16_t *tdstY = dstY;
         const uint16_t *tsrc0 = src[0];
         for (x = c->srcW; x > 0; x--) {
-            *tdstY++ = *tsrc0++ << 6;
+            *tdstY++ = *tsrc0++ << shift[0];
         }
         src[0] += srcStride[0] / 2;
         dstY += dstStride[0] / 2;
@@ -207,8 +219,8 @@ static int planarToP010Wrapper(SwsContext *c, const uint8_t 
*src8[],
             const uint16_t *tsrc1 = src[1];
             const uint16_t *tsrc2 = src[2];
             for (x = c->srcW / 2; x > 0; x--) {
-                *tdstUV++ = *tsrc1++ << 6;
-                *tdstUV++ = *tsrc2++ << 6;
+                *tdstUV++ = *tsrc1++ << shift[1];
+                *tdstUV++ = *tsrc2++ << shift[2];
             }
             src[1] += srcStride[1] / 2;
             src[2] += srcStride[2] / 2;
@@ -1738,14 +1750,17 @@ void ff_get_unscaled_swscale(SwsContext *c)
         !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || 
c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
         c->swscale = ff_yuv2rgb_get_func_ptr(c);
     }
-    /* yuv420p10_to_p010 */
-    if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == 
AV_PIX_FMT_YUVA420P10) &&
-        dstFormat == AV_PIX_FMT_P010) {
-        c->swscale = planarToP010Wrapper;
+    /* yuv420p1x_to_p01x */
+    if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == 
AV_PIX_FMT_YUVA420P10 ||
+         srcFormat == AV_PIX_FMT_YUV420P12 ||
+         srcFormat == AV_PIX_FMT_YUV420P14 ||
+         srcFormat == AV_PIX_FMT_YUV420P16 || srcFormat == 
AV_PIX_FMT_YUVA420P16) &&
+        (dstFormat == AV_PIX_FMT_P010 || dstFormat == AV_PIX_FMT_P016)) {
+        c->swscale = planarToP01xWrapper;
     }
-    /* yuv420p_to_p010le */
+    /* yuv420p_to_p01xle */
     if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUVA420P) 
&&
-        dstFormat == AV_PIX_FMT_P010LE) {
+        (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P016LE)) {
         c->swscale = planar8ToP01xleWrapper;
     }
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 4df09306d3..98a6b99476 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -254,8 +254,8 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_AYUV64LE]    = { 1, 1},
     [AV_PIX_FMT_P010LE]      = { 1, 1 },
     [AV_PIX_FMT_P010BE]      = { 1, 1 },
-    [AV_PIX_FMT_P016LE]      = { 1, 0 },
-    [AV_PIX_FMT_P016BE]      = { 1, 0 },
+    [AV_PIX_FMT_P016LE]      = { 1, 1 },
+    [AV_PIX_FMT_P016BE]      = { 1, 1 },
 };
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
-- 
2.14.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 2/3] swscale: Add p016 output support and generalise yuv420p1x to p010

Reply via email to