[FFmpeg-devel] [PATCH] avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling (PR #20958)

ddesouza via ffmpeg-devel Tue, 18 Nov 2025 08:30:02 -0800

PR #20958 opened by ddesouza
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20958
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20958.patch


This is the revised version of PR #20562, according to the comments from @haasn 
and @mkver. 


>From a14bccc09b9af29672c141a447999fd3d99a1ba1 Mon Sep 17 00:00:00 2001
From: Diego de Souza <[email protected]>
Date: Wed, 12 Nov 2025 20:08:45 +0100
Subject: [PATCH 1/3] avutil/hwcontext_cuda: Expands pixel formats support

Add support for additional pixel formats in CUDA hardware context:
- Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
- Semiplanar formats (nv16, p210, p216)

Signed-off-by: Diego de Souza <[email protected]>
---
 libavutil/hwcontext_cuda.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 10d3399537..b0b65b2446 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -50,6 +50,10 @@ static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_P016,
     AV_PIX_FMT_P210,
     AV_PIX_FMT_P216,
+    AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUV420P10,
+    AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_YUV444P10,
     AV_PIX_FMT_YUV444P10MSB,
     AV_PIX_FMT_YUV444P12MSB,
     AV_PIX_FMT_YUV444P16,
-- 
2.49.1


>From 9d85f56a1a109a897a40f0993f2c6b13f1b81dcf Mon Sep 17 00:00:00 2001
From: Diego de Souza <[email protected]>
Date: Thu, 13 Nov 2025 09:49:45 +0100
Subject: [PATCH 2/3] avfilter/hwupload_cuda: Expands pixel formats support

Add support for uploading additional pixel formats to NVIDIA GPUs:
- Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
- Semiplanar formats (nv16, p210, p216)

Signed-off-by: Diego de Souza <[email protected]>
---
 libavfilter/vf_hwupload_cuda.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c
index b505f8b298..34f959ca50 100644
--- a/libavfilter/vf_hwupload_cuda.c
+++ b/libavfilter/vf_hwupload_cuda.c
@@ -59,9 +59,9 @@ static int cudaupload_query_formats(const AVFilterContext 
*ctx,
     int ret;
 
     static const enum AVPixelFormat input_pix_fmts[] = {
-        AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, 
AV_PIX_FMT_YUV444P,
-        AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16,
-        AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32,
+        AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, 
AV_PIX_FMT_NV16, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_P210, AV_PIX_FMT_P216, 
AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, 
AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, AV_PIX_FMT_RGB32, 
AV_PIX_FMT_BGR32,
 #if CONFIG_VULKAN
         AV_PIX_FMT_VULKAN,
 #endif
-- 
2.49.1


>From 92d47b6b570e96e8ea234d1cd7bb2dd5b5d289a7 Mon Sep 17 00:00:00 2001
From: Diego de Souza <[email protected]>
Date: Tue, 18 Nov 2025 17:16:43 +0100
Subject: [PATCH 3/3] avfilter/scale_cuda: Add support for 4:2:2 chroma
 subsampling

The supported YUV pixel formats were separated between planar
and semiplanar. This approach reduces the number of CUDA kernels
for all pixel formats.

This patch:
1. Adds support for YUV 4:2:2 planar and semi-planar formats:
        yuv422p, yuv422p10, nv16, p210, p216
2. Implements new conversion structures and kernel definitions
        for planar and semi-planar formats

Signed-off-by: Diego de Souza <[email protected]>
---
 libavfilter/vf_scale_cuda.c  |   91 ++-
 libavfilter/vf_scale_cuda.cu | 1151 +++++++++++++++++-----------------
 libavfilter/vf_scale_cuda.h  |   16 +
 3 files changed, 672 insertions(+), 586 deletions(-)

diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 88a6e20610..4e66593489 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -39,17 +39,29 @@
 #include "cuda/load_helper.h"
 #include "vf_scale_cuda.h"
 
-static const enum AVPixelFormat supported_formats[] = {
-    AV_PIX_FMT_YUV420P,
-    AV_PIX_FMT_NV12,
-    AV_PIX_FMT_YUV444P,
-    AV_PIX_FMT_P010,
-    AV_PIX_FMT_P016,
-    AV_PIX_FMT_YUV444P16,
-    AV_PIX_FMT_0RGB32,
-    AV_PIX_FMT_0BGR32,
-    AV_PIX_FMT_RGB32,
-    AV_PIX_FMT_BGR32,
+struct format_entry {
+    enum AVPixelFormat format;
+    char name[13];
+};
+
+static const struct format_entry supported_formats[] = {
+    {AV_PIX_FMT_YUV420P,  "planar8"},
+    {AV_PIX_FMT_YUV422P,  "planar8"},
+    {AV_PIX_FMT_YUV444P,  "planar8"},
+    {AV_PIX_FMT_YUV420P10,"planar10"},
+    {AV_PIX_FMT_YUV422P10,"planar10"},
+    {AV_PIX_FMT_YUV444P10,"planar10"},
+    {AV_PIX_FMT_YUV444P16,"planar16"},
+    {AV_PIX_FMT_NV12,     "semiplanar8"},
+    {AV_PIX_FMT_NV16,     "semiplanar8"},
+    {AV_PIX_FMT_P010,     "semiplanar10"},
+    {AV_PIX_FMT_P210,     "semiplanar10"},
+    {AV_PIX_FMT_P016,     "semiplanar16"},
+    {AV_PIX_FMT_P216,     "semiplanar16"},
+    {AV_PIX_FMT_0RGB32,   "bgr0"},
+    {AV_PIX_FMT_0BGR32,   "rgb0"},
+    {AV_PIX_FMT_RGB32,    "bgra"},
+    {AV_PIX_FMT_BGR32,    "rgba"},
 };
 
 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -187,11 +199,21 @@ static int format_is_supported(enum AVPixelFormat fmt)
     int i;
 
     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
-        if (supported_formats[i] == fmt)
+        if (supported_formats[i].format == fmt)
             return 1;
     return 0;
 }
 
+static const char* get_format_name(enum AVPixelFormat fmt)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+        if (supported_formats[i].format == fmt)
+            return supported_formats[i].name;
+    return NULL;
+}
+
 static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat 
in_format, enum AVPixelFormat out_format)
 {
     CUDAScaleContext *s = ctx->priv;
@@ -284,8 +306,8 @@ static av_cold int cudascale_load_functions(AVFilterContext 
*ctx)
     char buf[128];
     int ret;
 
-    const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
-    const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
+    const char *in_fmt_name = get_format_name(s->in_fmt);
+    const char *out_fmt_name = get_format_name(s->out_fmt);
 
     const char *function_infix = "";
 
@@ -335,11 +357,13 @@ static av_cold int 
cudascale_load_functions(AVFilterContext *ctx)
         ret = AVERROR(ENOSYS);
         goto fail;
     }
+    av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf, 
av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
 
     snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, 
in_fmt_name, out_fmt_name);
     ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
     if (ret < 0)
         goto fail;
+    av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf, 
av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
 
 fail:
     CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -416,26 +440,35 @@ fail:
 
 static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
                               CUtexObject src_tex[4], int src_left, int 
src_top, int src_width, int src_height,
-                              AVFrame *out_frame, int dst_width, int 
dst_height, int dst_pitch)
+                              AVFrame *out_frame, int dst_width, int 
dst_height, int dst_pitch, int color_range)
 {
     CUDAScaleContext *s = ctx->priv;
     CudaFunctions *cu = s->hwctx->internal->cuda_dl;
 
-    CUdeviceptr dst_devptr[4] = {
-        (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
-        (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
+    CUDAScaleKernelParams params = {
+        .src_tex = {src_tex[0], src_tex[1], src_tex[2], src_tex[3]},
+        .dst = {
+            (CUdeviceptr)out_frame->data[0],
+            (CUdeviceptr)out_frame->data[1],
+            (CUdeviceptr)out_frame->data[2],
+            (CUdeviceptr)out_frame->data[3]
+        },
+        .dst_width = dst_width,
+        .dst_height = dst_height,
+        .dst_pitch = dst_pitch,
+        .src_left = src_left,
+        .src_top = src_top,
+        .src_width = src_width,
+        .src_height = src_height,
+        .param = s->param,
+        .color_range = color_range
     };
 
-    void *args_uchar[] = {
-        &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
-        &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
-        &dst_width, &dst_height, &dst_pitch,
-        &src_left, &src_top, &src_width, &src_height, &s->param
-    };
+    void *args[] = { &params };
 
     return CHECK_CU(cu->cuLaunchKernel(func,
                                        DIV_UP(dst_width, BLOCKX), 
DIV_UP(dst_height, BLOCKY), 1,
-                                       BLOCKX, BLOCKY, 1, 0, s->cu_stream, 
args_uchar, NULL));
+                                       BLOCKX, BLOCKY, 1, 0, s->cu_stream, 
args, NULL));
 }
 
 static int scalecuda_resize(AVFilterContext *ctx,
@@ -486,10 +519,14 @@ static int scalecuda_resize(AVFilterContext *ctx,
             goto exit;
     }
 
+    // Normalize color range: treat AVCOL_RANGE_UNSPECIFIED as limited range 
(MPEG)
+    // This follows the convention used in other FFmpeg filters (e.g., 
vf_zscale)
+    int color_range = (in->color_range == AVCOL_RANGE_UNSPECIFIED) ? 
AVCOL_RANGE_MPEG : in->color_range;
+
     // scale primary plane(s). Usually Y (and A), or single plane of RGB 
frames.
     ret = call_resize_kernel(ctx, s->cu_func,
                              tex, in->crop_left, in->crop_top, crop_width, 
crop_height,
-                             out, out->width, out->height, out->linesize[0]);
+                             out, out->width, out->height, out->linesize[0], 
color_range);
     if (ret < 0)
         goto exit;
 
@@ -503,7 +540,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
                                  out,
                                  AV_CEIL_RSHIFT(out->width, 
s->out_desc->log2_chroma_w),
                                  AV_CEIL_RSHIFT(out->height, 
s->out_desc->log2_chroma_h),
-                                 out->linesize[1]);
+                                 out->linesize[1], color_range);
         if (ret < 0)
             goto exit;
     }
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index 271b55cd5d..ed5432a53f 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -35,9 +35,18 @@ using subsample_function_t = T (*)(cudaTextureObject_t tex, 
int xo, int yo,
 static const ushort mask_10bit = 0xFFC0;
 static const ushort mask_16bit = 0xFFFF;
 
-static inline __device__ ushort conv_8to16(uchar in, ushort mask)
+static inline __device__ ushort conv_8to16(uchar in, ushort mask, int 
color_range)
 {
-    return ((ushort)in | ((ushort)in << 8)) & mask;
+    ushort shifted = (ushort)in << 8;
+    // AVCOL_RANGE_MPEG = 1 (limited range)
+    return (color_range == 1) ? shifted : ((shifted | ((ushort)in )) & mask);
+}
+
+static inline __device__ ushort conv_8to10pl(uchar in, int color_range)
+{
+    ushort shifted = (ushort)in << 2;
+    // AVCOL_RANGE_MPEG = 1 (limited range)
+    return (color_range == 1) ? shifted : (shifted | ((ushort)in >> 6));
 }
 
 static inline __device__ uchar conv_16to8(ushort in)
@@ -50,9 +59,23 @@ static inline __device__ uchar conv_10to8(ushort in)
     return in >> 8;
 }
 
-static inline __device__ ushort conv_10to16(ushort in)
+static inline __device__ uchar conv_10to8pl(ushort in)
 {
-    return in | (in >> 10);
+    return in >> 2;
+}
+
+static inline __device__ ushort conv_10to16(ushort in, int color_range)
+{
+    ushort shifted = (in >> 10);
+    // AVCOL_RANGE_MPEG = 1 (limited range)
+    return (color_range == 1) ? in : (in | shifted);
+}
+
+static inline __device__ ushort conv_10to16pl(ushort in, int color_range)
+{
+    ushort shifted = (in << 6);
+    // AVCOL_RANGE_MPEG = 1 (limited range)
+    return (color_range == 1) ? shifted : (shifted | (in >> 4));
 }
 
 static inline __device__ ushort conv_16to10(ushort in)
@@ -60,12 +83,18 @@ static inline __device__ ushort conv_16to10(ushort in)
     return in & mask_10bit;
 }
 
+static inline __device__ ushort conv_16to10pl(ushort in)
+{
+    return in >> 6;
+}
+
 #define DEF_F(N, T) \
     template<subsample_function_t<in_T> subsample_func_y,                      
                \
              subsample_function_t<in_T_uv> subsample_func_uv>                  
                \
     __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], 
int xo, int yo, \
                                     int dst_width, int dst_height, int 
dst_pitch,              \
-                                    int src_left, int src_top, int src_width, 
int src_height, float param)
+                                    int src_left, int src_top, int src_width, 
int src_height,  \
+                                    float param, int color_range)
 
 #define SUB_F(m, plane) \
     subsample_func_##m(src_tex[plane], xo, yo, \
@@ -81,9 +110,9 @@ static inline __device__ ushort conv_16to10(ushort in)
 #define DEFAULT_DST(n) \
     dst[n][yo*FIXED_PITCH+xo]
 
-// yuv420p->X
+// planar8->X
 
-struct Convert_yuv420p_yuv420p
+struct Convert_planar8_planar8
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
@@ -103,7 +132,47 @@ struct Convert_yuv420p_yuv420p
     }
 };
 
-struct Convert_yuv420p_nv12
+struct Convert_planar8_planar10
+{
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = conv_8to10pl(SUB_F(uv, 1), color_range);
+        DEFAULT_DST(2) = conv_8to10pl(SUB_F(uv, 2), color_range);
+    }
+};
+
+struct Convert_planar8_planar16
+{
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit, color_range);
+        DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit, color_range);
+    }
+};
+
+struct Convert_planar8_semiplanar8
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
@@ -125,14 +194,82 @@ struct Convert_yuv420p_nv12
     }
 };
 
-struct Convert_yuv420p_yuv444p
+struct Convert_planar8_semiplanar10
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
     typedef uchar in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = make_ushort2(
+            conv_8to16(SUB_F(uv, 1), mask_10bit, color_range),
+            conv_8to16(SUB_F(uv, 2), mask_10bit, color_range)
+        );
+    }
+};
+
+struct Convert_planar8_semiplanar16
+{
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = make_ushort2(
+            conv_8to16(SUB_F(uv, 1), mask_16bit, color_range),
+            conv_8to16(SUB_F(uv, 2), mask_16bit, color_range)
+        );
+    }
+};
+
+
+
+// planar10->X
+
+struct Convert_planar10_planar8
+{
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef uchar out_T;
     typedef uchar out_T_uv;
 
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = conv_10to8pl(SUB_F(uv, 1));
+        DEFAULT_DST(2) = conv_10to8pl(SUB_F(uv, 2));
+    }
+};
+
+struct Convert_planar10_planar10
+{
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
     DEF_F(Convert, out_T)
     {
         DEFAULT_DST(0) = SUB_F(y, 0);
@@ -145,249 +282,227 @@ struct Convert_yuv420p_yuv444p
     }
 };
 
-struct Convert_yuv420p_p010le
+struct Convert_planar10_planar16
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_10bit),
-            conv_8to16(SUB_F(uv, 2), mask_10bit)
-        );
-    }
-};
-
-struct Convert_yuv420p_p016le
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_16bit),
-            conv_8to16(SUB_F(uv, 2), mask_16bit)
-        );
-    }
-};
-
-struct Convert_yuv420p_yuv444p16le
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef ushort out_T;
     typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), color_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
-        DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+        DEFAULT_DST(1) = conv_10to16pl(SUB_F(uv, 1), color_range);
+        DEFAULT_DST(2) = conv_10to16pl(SUB_F(uv, 2), color_range);
     }
 };
 
-// nv12->X
-
-struct Convert_nv12_yuv420p
+struct Convert_planar10_semiplanar8
 {
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = SUB_F(y, 0);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = res.x;
-        DEFAULT_DST(2) = res.y;
-    }
-};
-
-struct Convert_nv12_nv12
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
     typedef uchar out_T;
     typedef uchar2 out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = SUB_F(y, 0);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = SUB_F(uv, 1);
-    }
-};
-
-struct Convert_nv12_yuv444p
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = SUB_F(y, 0);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = res.x;
-        DEFAULT_DST(2) = res.y;
-    }
-};
-
-struct Convert_nv12_p010le
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(res.x, mask_10bit),
-            conv_8to16(res.y, mask_10bit)
-        );
-    }
-};
-
-struct Convert_nv12_p016le
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(res.x, mask_16bit),
-            conv_8to16(res.y, mask_16bit)
-        );
-    }
-};
-
-struct Convert_nv12_yuv444p16le
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit);
-        DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit);
-    }
-};
-
-// yuv444p->X
-
-struct Convert_yuv444p_yuv420p
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = SUB_F(y, 0);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = SUB_F(uv, 1);
-        DEFAULT_DST(2) = SUB_F(uv, 2);
-    }
-};
-
-struct Convert_yuv444p_nv12
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = SUB_F(y, 0);
+        DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         DEFAULT_DST(1) = make_uchar2(
+            conv_10to8pl(SUB_F(uv, 1)),
+            conv_10to8pl(SUB_F(uv, 2))
+        );
+    }
+};
+
+struct Convert_planar10_semiplanar10
+{
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = (SUB_F(y, 0) << 6);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = make_ushort2(
+            (SUB_F(uv, 1) << 6),
+            (SUB_F(uv, 2) << 6)
+        );
+    }
+};
+
+struct Convert_planar10_semiplanar16
+{
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = make_ushort2(
+            conv_10to16pl(SUB_F(uv, 1), color_range),
+            conv_10to16pl(SUB_F(uv, 2), color_range)
+        );
+    }
+};
+
+// planar16->X
+
+struct Convert_planar16_planar8
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef uchar out_T;
+    typedef uchar out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
+        DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
+    }
+};
+
+struct Convert_planar16_planar10
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = conv_16to10pl(SUB_F(uv, 1));
+        DEFAULT_DST(2) = conv_16to10pl(SUB_F(uv, 2));
+    }
+};
+
+struct Convert_planar16_planar16
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = SUB_F(y, 0);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = SUB_F(uv, 1);
+        DEFAULT_DST(2) = SUB_F(uv, 2);
+    }
+};
+
+struct Convert_planar16_semiplanar8
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = make_uchar2(
+            conv_16to8(SUB_F(uv, 1)),
+            conv_16to8(SUB_F(uv, 2))
+        );
+    }
+};
+
+struct Convert_planar16_semiplanar10
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = make_ushort2(
+            conv_16to10(SUB_F(uv, 1)),
+            conv_16to10(SUB_F(uv, 2))
+        );
+    }
+};
+
+struct Convert_planar16_semiplanar16
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = SUB_F(y, 0);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = make_ushort2(
             SUB_F(uv, 1),
             SUB_F(uv, 2)
         );
     }
 };
 
-struct Convert_yuv444p_yuv444p
+// semiplanar8->X
+
+struct Convert_semiplanar8_planar8
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
-    typedef uchar in_T_uv;
+    typedef uchar2 in_T_uv;
     typedef uchar out_T;
     typedef uchar out_T_uv;
 
@@ -398,78 +513,122 @@ struct Convert_yuv444p_yuv444p
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = SUB_F(uv, 1);
-        DEFAULT_DST(2) = SUB_F(uv, 2);
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = res.x;
+        DEFAULT_DST(2) = res.y;
     }
 };
 
-struct Convert_yuv444p_p010le
+struct Convert_semiplanar8_planar10
 {
     static const int in_bit_depth = 8;
     typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_10bit),
-            conv_8to16(SUB_F(uv, 2), mask_10bit)
-        );
-    }
-};
-
-struct Convert_yuv444p_p016le
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_8to16(SUB_F(uv, 1), mask_16bit),
-            conv_8to16(SUB_F(uv, 2), mask_16bit)
-        );
-    }
-};
-
-struct Convert_yuv444p_yuv444p16le
-{
-    static const int in_bit_depth = 8;
-    typedef uchar in_T;
-    typedef uchar in_T_uv;
+    typedef uchar2 in_T_uv;
     typedef ushort out_T;
     typedef ushort out_T_uv;
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+        DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), color_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
-        DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
-        DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = conv_8to10pl(res.x, color_range);
+        DEFAULT_DST(2) = conv_8to10pl(res.y, color_range);
     }
 };
 
-// p010le->X
+struct Convert_semiplanar8_planar16
+{
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
 
-struct Convert_p010le_yuv420p
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit, color_range);
+        DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit, color_range);
+    }
+};
+
+struct Convert_semiplanar8_semiplanar8
+{
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
+    typedef uchar out_T;
+    typedef uchar2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = SUB_F(y, 0);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        DEFAULT_DST(1) = SUB_F(uv, 1);
+    }
+};
+
+struct Convert_semiplanar8_semiplanar10
+{
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = make_ushort2(
+            conv_8to16(res.x, mask_10bit, color_range),
+            conv_8to16(res.y, mask_10bit, color_range)
+        );
+    }
+};
+
+struct Convert_semiplanar8_semiplanar16
+{
+    static const int in_bit_depth = 8;
+    typedef uchar in_T;
+    typedef uchar2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort2 out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = make_ushort2(
+            conv_8to16(res.x, mask_16bit, color_range),
+            conv_8to16(res.y, mask_16bit, color_range)
+        );
+    }
+};
+
+// semiplanar10->X
+
+struct Convert_semiplanar10_planar8
 {
     static const int in_bit_depth = 10;
     typedef ushort in_T;
@@ -490,7 +649,49 @@ struct Convert_p010le_yuv420p
     }
 };
 
-struct Convert_p010le_nv12
+struct Convert_semiplanar10_planar10
+{
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = SUB_F(y, 0) >> 6;
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = res.x >> 6;
+        DEFAULT_DST(2) = res.y >> 6;
+    }
+};
+
+struct Convert_semiplanar10_planar16
+{
+    static const int in_bit_depth = 10;
+    typedef ushort in_T;
+    typedef ushort2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), color_range);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = conv_10to16(res.x, color_range);
+        DEFAULT_DST(2) = conv_10to16(res.y, color_range);
+    }
+};
+
+struct Convert_semiplanar10_semiplanar8
 {
     static const int in_bit_depth = 10;
     typedef ushort in_T;
@@ -513,28 +714,7 @@ struct Convert_p010le_nv12
     }
 };
 
-struct Convert_p010le_yuv444p
-{
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_10to8(res.x);
-        DEFAULT_DST(2) = conv_10to8(res.y);
-    }
-};
-
-struct Convert_p010le_p010le
+struct Convert_semiplanar10_semiplanar10
 {
     static const int in_bit_depth = 10;
     typedef ushort in_T;
@@ -553,7 +733,7 @@ struct Convert_p010le_p010le
     }
 };
 
-struct Convert_p010le_p016le
+struct Convert_semiplanar10_semiplanar16
 {
     static const int in_bit_depth = 10;
     typedef ushort in_T;
@@ -563,43 +743,23 @@ struct Convert_p010le_p016le
 
     DEF_F(Convert, out_T)
     {
-        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
+        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), color_range);
     }
 
     DEF_F(Convert_uv, out_T_uv)
     {
         in_T_uv res = SUB_F(uv, 1);
         DEFAULT_DST(1) = make_ushort2(
-            conv_10to16(res.x),
-            conv_10to16(res.y)
+            conv_10to16(res.x, color_range),
+            conv_10to16(res.y, color_range)
         );
     }
 };
 
-struct Convert_p010le_yuv444p16le
-{
-    static const int in_bit_depth = 10;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort out_T_uv;
 
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
-    }
+// semiplanar16->X
 
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_10to16(res.x);
-        DEFAULT_DST(2) = conv_10to16(res.y);
-    }
-};
-
-// p016le->X
-
-struct Convert_p016le_yuv420p
+struct Convert_semiplanar16_planar8
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
@@ -620,7 +780,49 @@ struct Convert_p016le_yuv420p
     }
 };
 
-struct Convert_p016le_nv12
+struct Convert_semiplanar16_planar10
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = conv_16to10pl(res.x);
+        DEFAULT_DST(2) = conv_16to10pl(res.y);
+    }
+};
+
+struct Convert_semiplanar16_planar16
+{
+    static const int in_bit_depth = 16;
+    typedef ushort in_T;
+    typedef ushort2 in_T_uv;
+    typedef ushort out_T;
+    typedef ushort out_T_uv;
+
+    DEF_F(Convert, out_T)
+    {
+        DEFAULT_DST(0) = SUB_F(y, 0);
+    }
+
+    DEF_F(Convert_uv, out_T_uv)
+    {
+        in_T_uv res = SUB_F(uv, 1);
+        DEFAULT_DST(1) = res.x;
+        DEFAULT_DST(2) = res.y;
+    }
+};
+
+struct Convert_semiplanar16_semiplanar8
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
@@ -643,28 +845,7 @@ struct Convert_p016le_nv12
     }
 };
 
-struct Convert_p016le_yuv444p
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = conv_16to8(res.x);
-        DEFAULT_DST(2) = conv_16to8(res.y);
-    }
-};
-
-struct Convert_p016le_p010le
+struct Convert_semiplanar16_semiplanar10
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
@@ -687,7 +868,7 @@ struct Convert_p016le_p010le
     }
 };
 
-struct Convert_p016le_p016le
+struct Convert_semiplanar16_semiplanar16
 {
     static const int in_bit_depth = 16;
     typedef ushort in_T;
@@ -706,155 +887,6 @@ struct Convert_p016le_p016le
     }
 };
 
-struct Convert_p016le_yuv444p16le
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort2 in_T_uv;
-    typedef ushort out_T;
-    typedef ushort out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = SUB_F(y, 0);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        in_T_uv res = SUB_F(uv, 1);
-        DEFAULT_DST(1) = res.x;
-        DEFAULT_DST(2) = res.y;
-    }
-};
-
-// yuv444p16le->X
-
-struct Convert_yuv444p16le_yuv420p
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
-        DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
-    }
-};
-
-struct Convert_yuv444p16le_nv12
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef uchar out_T;
-    typedef uchar2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = make_uchar2(
-            conv_16to8(SUB_F(uv, 1)),
-            conv_16to8(SUB_F(uv, 2))
-        );
-    }
-};
-
-struct Convert_yuv444p16le_yuv444p
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef uchar out_T;
-    typedef uchar out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
-        DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
-    }
-};
-
-struct Convert_yuv444p16le_p010le
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = make_ushort2(
-            conv_16to10(SUB_F(uv, 1)),
-            conv_16to10(SUB_F(uv, 2))
-        );
-    }
-};
-
-struct Convert_yuv444p16le_p016le
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef ushort out_T;
-    typedef ushort2 out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = SUB_F(y, 0);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = make_ushort2(
-            SUB_F(uv, 1),
-            SUB_F(uv, 2)
-        );
-    }
-};
-
-struct Convert_yuv444p16le_yuv444p16le
-{
-    static const int in_bit_depth = 16;
-    typedef ushort in_T;
-    typedef ushort in_T_uv;
-    typedef ushort out_T;
-    typedef ushort out_T_uv;
-
-    DEF_F(Convert, out_T)
-    {
-        DEFAULT_DST(0) = SUB_F(y, 0);
-    }
-
-    DEF_F(Convert_uv, out_T_uv)
-    {
-        DEFAULT_DST(1) = SUB_F(uv, 1);
-        DEFAULT_DST(2) = SUB_F(uv, 2);
-    }
-};
-
 #define DEF_CONVERT_IDENTITY(fmt1, fmt2)\
                                         \
 struct Convert_##fmt1##_##fmt2          \
@@ -930,7 +962,7 @@ struct Convert_bgr0_bgra
             res.x,
             res.y,
             res.z,
-            1
+            0xFF
         );
     }
 
@@ -954,7 +986,7 @@ struct Convert_bgr0_rgba
             res.z,
             res.y,
             res.x,
-            1
+            0xFF
         );
     }
 
@@ -978,7 +1010,7 @@ struct Convert_rgb0_bgra
             res.z,
             res.y,
             res.x,
-            1
+            0xFF
         );
     }
 
@@ -1002,7 +1034,7 @@ struct Convert_rgb0_rgba
             res.x,
             res.y,
             res.z,
-            1
+            0xFF
         );
     }
 
@@ -1147,25 +1179,26 @@ __device__ static inline T 
Subsample_Bicubic(cudaTextureObject_t tex,
 
 /// --- FUNCTION EXPORTS ---
 
-#define KERNEL_ARGS(T) \
-    cudaTextureObject_t src_tex_0, cudaTextureObject_t src_tex_1, \
-    cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
-    T *dst_0, T *dst_1, T *dst_2, T *dst_3,                       \
-    int dst_width, int dst_height, int dst_pitch,                 \
-    int src_left, int src_top, int src_width, int src_height, float param
+#define KERNEL_ARGS(T) CUDAScaleKernelParams params
 
 #define SUBSAMPLE(Convert, T) \
-    cudaTextureObject_t src_tex[4] =                    \
-        { src_tex_0, src_tex_1, src_tex_2, src_tex_3 }; \
-    T *dst[4] = { dst_0, dst_1, dst_2, dst_3 };         \
+    cudaTextureObject_t src_tex[4] = {                  \
+        params.src_tex[0], params.src_tex[1],           \
+        params.src_tex[2], params.src_tex[3]            \
+    };                                                  \
+    T *dst[4] = {                                       \
+        (T*)params.dst[0], (T*)params.dst[1],           \
+        (T*)params.dst[2], (T*)params.dst[3]            \
+    };                                                  \
     int xo = blockIdx.x * blockDim.x + threadIdx.x;     \
     int yo = blockIdx.y * blockDim.y + threadIdx.y;     \
-    if (yo >= dst_height || xo >= dst_width) return;    \
+    if (yo >= params.dst_height || xo >= params.dst_width) return; \
     Convert(                                            \
         src_tex, dst, xo, yo,                           \
-        dst_width, dst_height, dst_pitch,               \
-        src_left, src_top,                              \
-        src_width, src_height, param);
+        params.dst_width, params.dst_height, params.dst_pitch, \
+        params.src_left, params.src_top,                \
+        params.src_width, params.src_height,            \
+        params.param, params.color_range);
 
 extern "C" {
 
@@ -1184,12 +1217,12 @@ extern "C" {
     NEAREST_KERNEL(C,_uv)
 
 #define NEAREST_KERNELS(C) \
-    NEAREST_KERNEL_RAW(yuv420p_ ## C)     \
-    NEAREST_KERNEL_RAW(nv12_ ## C)        \
-    NEAREST_KERNEL_RAW(yuv444p_ ## C)     \
-    NEAREST_KERNEL_RAW(p010le_ ## C)      \
-    NEAREST_KERNEL_RAW(p016le_ ## C)      \
-    NEAREST_KERNEL_RAW(yuv444p16le_ ## C)
+    NEAREST_KERNEL_RAW(planar8_ ## C)      \
+    NEAREST_KERNEL_RAW(planar10_ ## C)     \
+    NEAREST_KERNEL_RAW(planar16_ ## C)     \
+    NEAREST_KERNEL_RAW(semiplanar8_ ## C)  \
+    NEAREST_KERNEL_RAW(semiplanar10_ ## C) \
+    NEAREST_KERNEL_RAW(semiplanar16_ ## C)
 
 #define NEAREST_KERNELS_RGB(C) \
     NEAREST_KERNEL_RAW(rgb0_ ## C)  \
@@ -1197,12 +1230,12 @@ extern "C" {
     NEAREST_KERNEL_RAW(rgba_ ## C)  \
     NEAREST_KERNEL_RAW(bgra_ ## C)  \
 
-NEAREST_KERNELS(yuv420p)
-NEAREST_KERNELS(nv12)
-NEAREST_KERNELS(yuv444p)
-NEAREST_KERNELS(p010le)
-NEAREST_KERNELS(p016le)
-NEAREST_KERNELS(yuv444p16le)
+NEAREST_KERNELS(planar8)
+NEAREST_KERNELS(planar10)
+NEAREST_KERNELS(planar16)
+NEAREST_KERNELS(semiplanar8)
+NEAREST_KERNELS(semiplanar10)
+NEAREST_KERNELS(semiplanar16)
 
 NEAREST_KERNELS_RGB(rgb0)
 NEAREST_KERNELS_RGB(bgr0)
@@ -1224,12 +1257,12 @@ NEAREST_KERNELS_RGB(bgra)
     BILINEAR_KERNEL(C,_uv)
 
 #define BILINEAR_KERNELS(C) \
-    BILINEAR_KERNEL_RAW(yuv420p_ ## C)     \
-    BILINEAR_KERNEL_RAW(nv12_ ## C)        \
-    BILINEAR_KERNEL_RAW(yuv444p_ ## C)     \
-    BILINEAR_KERNEL_RAW(p010le_ ## C)      \
-    BILINEAR_KERNEL_RAW(p016le_ ## C)      \
-    BILINEAR_KERNEL_RAW(yuv444p16le_ ## C)
+    BILINEAR_KERNEL_RAW(planar8_ ## C)      \
+    BILINEAR_KERNEL_RAW(planar10_ ## C)     \
+    BILINEAR_KERNEL_RAW(planar16_ ## C)     \
+    BILINEAR_KERNEL_RAW(semiplanar8_ ## C)  \
+    BILINEAR_KERNEL_RAW(semiplanar10_ ## C) \
+    BILINEAR_KERNEL_RAW(semiplanar16_ ## C)
 
 #define BILINEAR_KERNELS_RGB(C)     \
     BILINEAR_KERNEL_RAW(rgb0_ ## C) \
@@ -1237,12 +1270,12 @@ NEAREST_KERNELS_RGB(bgra)
     BILINEAR_KERNEL_RAW(rgba_ ## C) \
     BILINEAR_KERNEL_RAW(bgra_ ## C)
 
-BILINEAR_KERNELS(yuv420p)
-BILINEAR_KERNELS(nv12)
-BILINEAR_KERNELS(yuv444p)
-BILINEAR_KERNELS(p010le)
-BILINEAR_KERNELS(p016le)
-BILINEAR_KERNELS(yuv444p16le)
+BILINEAR_KERNELS(planar8)
+BILINEAR_KERNELS(planar10)
+BILINEAR_KERNELS(planar16)
+BILINEAR_KERNELS(semiplanar8)
+BILINEAR_KERNELS(semiplanar10)
+BILINEAR_KERNELS(semiplanar16)
 
 BILINEAR_KERNELS_RGB(rgb0)
 BILINEAR_KERNELS_RGB(bgr0)
@@ -1264,12 +1297,12 @@ BILINEAR_KERNELS_RGB(bgra)
     BICUBIC_KERNEL(C,_uv)
 
 #define BICUBIC_KERNELS(C) \
-    BICUBIC_KERNEL_RAW(yuv420p_ ## C)     \
-    BICUBIC_KERNEL_RAW(nv12_ ## C)        \
-    BICUBIC_KERNEL_RAW(yuv444p_ ## C)     \
-    BICUBIC_KERNEL_RAW(p010le_ ## C)      \
-    BICUBIC_KERNEL_RAW(p016le_ ## C)      \
-    BICUBIC_KERNEL_RAW(yuv444p16le_ ## C)
+    BICUBIC_KERNEL_RAW(planar8_ ## C)      \
+    BICUBIC_KERNEL_RAW(planar10_ ## C)     \
+    BICUBIC_KERNEL_RAW(planar16_ ## C)     \
+    BICUBIC_KERNEL_RAW(semiplanar8_ ## C)  \
+    BICUBIC_KERNEL_RAW(semiplanar10_ ## C) \
+    BICUBIC_KERNEL_RAW(semiplanar16_ ## C)
 
 #define BICUBIC_KERNELS_RGB(C)      \
     BICUBIC_KERNEL_RAW(rgb0_ ## C)  \
@@ -1277,12 +1310,12 @@ BILINEAR_KERNELS_RGB(bgra)
     BICUBIC_KERNEL_RAW(rgba_ ## C)  \
     BICUBIC_KERNEL_RAW(bgra_ ## C)
 
-BICUBIC_KERNELS(yuv420p)
-BICUBIC_KERNELS(nv12)
-BICUBIC_KERNELS(yuv444p)
-BICUBIC_KERNELS(p010le)
-BICUBIC_KERNELS(p016le)
-BICUBIC_KERNELS(yuv444p16le)
+BICUBIC_KERNELS(planar8)
+BICUBIC_KERNELS(planar10)
+BICUBIC_KERNELS(planar16)
+BICUBIC_KERNELS(semiplanar8)
+BICUBIC_KERNELS(semiplanar10)
+BICUBIC_KERNELS(semiplanar16)
 
 BICUBIC_KERNELS_RGB(rgb0)
 BICUBIC_KERNELS_RGB(bgr0)
@@ -1304,12 +1337,12 @@ BICUBIC_KERNELS_RGB(bgra)
     LANCZOS_KERNEL(C,_uv)
 
 #define LANCZOS_KERNELS(C) \
-    LANCZOS_KERNEL_RAW(yuv420p_ ## C)     \
-    LANCZOS_KERNEL_RAW(nv12_ ## C)        \
-    LANCZOS_KERNEL_RAW(yuv444p_ ## C)     \
-    LANCZOS_KERNEL_RAW(p010le_ ## C)      \
-    LANCZOS_KERNEL_RAW(p016le_ ## C)      \
-    LANCZOS_KERNEL_RAW(yuv444p16le_ ## C)
+    LANCZOS_KERNEL_RAW(planar8_ ## C)      \
+    LANCZOS_KERNEL_RAW(planar10_ ## C)     \
+    LANCZOS_KERNEL_RAW(planar16_ ## C)     \
+    LANCZOS_KERNEL_RAW(semiplanar8_ ## C)  \
+    LANCZOS_KERNEL_RAW(semiplanar10_ ## C) \
+    LANCZOS_KERNEL_RAW(semiplanar16_ ## C)
 
 #define LANCZOS_KERNELS_RGB(C)      \
     LANCZOS_KERNEL_RAW(rgb0_ ## C)  \
@@ -1317,12 +1350,12 @@ BICUBIC_KERNELS_RGB(bgra)
     LANCZOS_KERNEL_RAW(rgba_ ## C)  \
     LANCZOS_KERNEL_RAW(bgra_ ## C)
 
-LANCZOS_KERNELS(yuv420p)
-LANCZOS_KERNELS(nv12)
-LANCZOS_KERNELS(yuv444p)
-LANCZOS_KERNELS(p010le)
-LANCZOS_KERNELS(p016le)
-LANCZOS_KERNELS(yuv444p16le)
+LANCZOS_KERNELS(planar8)
+LANCZOS_KERNELS(planar10)
+LANCZOS_KERNELS(planar16)
+LANCZOS_KERNELS(semiplanar8)
+LANCZOS_KERNELS(semiplanar10)
+LANCZOS_KERNELS(semiplanar16)
 
 LANCZOS_KERNELS_RGB(rgb0)
 LANCZOS_KERNELS_RGB(bgr0)
diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h
index 40d5b9cfac..1fb3498ee8 100644
--- a/libavfilter/vf_scale_cuda.h
+++ b/libavfilter/vf_scale_cuda.h
@@ -23,6 +23,22 @@
 #ifndef AVFILTER_SCALE_CUDA_H
 #define AVFILTER_SCALE_CUDA_H
 
+#include <ffnvcodec/dynlink_cuda.h>
+
 #define SCALE_CUDA_PARAM_DEFAULT 999999.0f
 
+typedef struct __attribute__((aligned(16))) {
+    CUtexObject src_tex[4];
+    CUdeviceptr dst[4];
+    int dst_width;
+    int dst_height;
+    int dst_pitch;
+    int src_left;
+    int src_top;
+    int src_width;
+    int src_height;
+    float param;
+    int color_range;
+} CUDAScaleKernelParams;
+
 #endif
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PATCH] avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling (PR #20958)

Reply via email to