PR #20958 opened by ddesouza URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20958 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20958.patch
This is the revised version of PR #20562, according to the comments from @haasn and @mkver. >From a14bccc09b9af29672c141a447999fd3d99a1ba1 Mon Sep 17 00:00:00 2001 From: Diego de Souza <[email protected]> Date: Wed, 12 Nov 2025 20:08:45 +0100 Subject: [PATCH 1/3] avutil/hwcontext_cuda: Expands pixel formats support Add support for additional pixel formats in CUDA hardware context: - Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10) - Semiplanar formats (nv16, p210, p216) Signed-off-by: Diego de Souza <[email protected]> --- libavutil/hwcontext_cuda.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 10d3399537..b0b65b2446 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -50,6 +50,10 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_P016, AV_PIX_FMT_P210, AV_PIX_FMT_P216, + AV_PIX_FMT_YUV422P, + AV_PIX_FMT_YUV420P10, + AV_PIX_FMT_YUV422P10, + AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P10MSB, AV_PIX_FMT_YUV444P12MSB, AV_PIX_FMT_YUV444P16, -- 2.49.1 >From 9d85f56a1a109a897a40f0993f2c6b13f1b81dcf Mon Sep 17 00:00:00 2001 From: Diego de Souza <[email protected]> Date: Thu, 13 Nov 2025 09:49:45 +0100 Subject: [PATCH 2/3] avfilter/hwupload_cuda: Expands pixel formats support Add support for uploading additional pixel formats to NVIDIA GPUs: - Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10) - Semiplanar formats (nv16, p210, p216) Signed-off-by: Diego de Souza <[email protected]> --- libavfilter/vf_hwupload_cuda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c index b505f8b298..34f959ca50 100644 --- a/libavfilter/vf_hwupload_cuda.c +++ b/libavfilter/vf_hwupload_cuda.c @@ -59,9 +59,9 @@ static int cudaupload_query_formats(const AVFilterContext *ctx, int ret; static const enum AVPixelFormat input_pix_fmts[] = { - AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV444P, - AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16, - AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, + AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_NV16, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, + AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_P210, AV_PIX_FMT_P216, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P16, + AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, AV_PIX_FMT_RGB32, AV_PIX_FMT_BGR32, #if CONFIG_VULKAN AV_PIX_FMT_VULKAN, #endif -- 2.49.1 >From 92d47b6b570e96e8ea234d1cd7bb2dd5b5d289a7 Mon Sep 17 00:00:00 2001 From: Diego de Souza <[email protected]> Date: Tue, 18 Nov 2025 17:16:43 +0100 Subject: [PATCH 3/3] avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling The supported YUV pixel formats were separated between planar and semiplanar. This approach reduces the number of CUDA kernels for all pixel formats. This patch: 1. Adds support for YUV 4:2:2 planar and semi-planar formats: yuv422p, yuv422p10, nv16, p210, p216 2. Implements new conversion structures and kernel definitions for planar and semi-planar formats Signed-off-by: Diego de Souza <[email protected]> --- libavfilter/vf_scale_cuda.c | 91 ++- libavfilter/vf_scale_cuda.cu | 1151 +++++++++++++++++----------------- libavfilter/vf_scale_cuda.h | 16 + 3 files changed, 672 insertions(+), 586 deletions(-) diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 88a6e20610..4e66593489 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -39,17 +39,29 @@ #include "cuda/load_helper.h" #include "vf_scale_cuda.h" -static const enum AVPixelFormat supported_formats[] = { - AV_PIX_FMT_YUV420P, - AV_PIX_FMT_NV12, - AV_PIX_FMT_YUV444P, - AV_PIX_FMT_P010, - AV_PIX_FMT_P016, - AV_PIX_FMT_YUV444P16, - AV_PIX_FMT_0RGB32, - AV_PIX_FMT_0BGR32, - AV_PIX_FMT_RGB32, - AV_PIX_FMT_BGR32, +struct format_entry { + enum AVPixelFormat format; + char name[13]; +}; + +static const struct format_entry supported_formats[] = { + {AV_PIX_FMT_YUV420P, "planar8"}, + {AV_PIX_FMT_YUV422P, "planar8"}, + {AV_PIX_FMT_YUV444P, "planar8"}, + {AV_PIX_FMT_YUV420P10,"planar10"}, + {AV_PIX_FMT_YUV422P10,"planar10"}, + {AV_PIX_FMT_YUV444P10,"planar10"}, + {AV_PIX_FMT_YUV444P16,"planar16"}, + {AV_PIX_FMT_NV12, "semiplanar8"}, + {AV_PIX_FMT_NV16, "semiplanar8"}, + {AV_PIX_FMT_P010, "semiplanar10"}, + {AV_PIX_FMT_P210, "semiplanar10"}, + {AV_PIX_FMT_P016, "semiplanar16"}, + {AV_PIX_FMT_P216, "semiplanar16"}, + {AV_PIX_FMT_0RGB32, "bgr0"}, + {AV_PIX_FMT_0BGR32, "rgb0"}, + {AV_PIX_FMT_RGB32, "bgra"}, + {AV_PIX_FMT_BGR32, "rgba"}, }; #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) @@ -187,11 +199,21 @@ static int format_is_supported(enum AVPixelFormat fmt) int i; for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) - if (supported_formats[i] == fmt) + if (supported_formats[i].format == fmt) return 1; return 0; } +static const char* get_format_name(enum AVPixelFormat fmt) +{ + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) + if (supported_formats[i].format == fmt) + return supported_formats[i].name; + return NULL; +} + static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format) { CUDAScaleContext *s = ctx->priv; @@ -284,8 +306,8 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx) char buf[128]; int ret; - const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt); - const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt); + const char *in_fmt_name = get_format_name(s->in_fmt); + const char *out_fmt_name = get_format_name(s->out_fmt); const char *function_infix = ""; @@ -335,11 +357,13 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx) ret = AVERROR(ENOSYS); goto fail; } + av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt)); snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name); ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf)); if (ret < 0) goto fail; + av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt)); fail: CHECK_CU(cu->cuCtxPopCurrent(&dummy)); @@ -416,26 +440,35 @@ fail: static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height, - AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch) + AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch, int color_range) { CUDAScaleContext *s = ctx->priv; CudaFunctions *cu = s->hwctx->internal->cuda_dl; - CUdeviceptr dst_devptr[4] = { - (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1], - (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3] + CUDAScaleKernelParams params = { + .src_tex = {src_tex[0], src_tex[1], src_tex[2], src_tex[3]}, + .dst = { + (CUdeviceptr)out_frame->data[0], + (CUdeviceptr)out_frame->data[1], + (CUdeviceptr)out_frame->data[2], + (CUdeviceptr)out_frame->data[3] + }, + .dst_width = dst_width, + .dst_height = dst_height, + .dst_pitch = dst_pitch, + .src_left = src_left, + .src_top = src_top, + .src_width = src_width, + .src_height = src_height, + .param = s->param, + .color_range = color_range }; - void *args_uchar[] = { - &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3], - &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3], - &dst_width, &dst_height, &dst_pitch, - &src_left, &src_top, &src_width, &src_height, &s->param - }; + void *args[] = { ¶ms }; return CHECK_CU(cu->cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, - BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL)); + BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL)); } static int scalecuda_resize(AVFilterContext *ctx, @@ -486,10 +519,14 @@ static int scalecuda_resize(AVFilterContext *ctx, goto exit; } + // Normalize color range: treat AVCOL_RANGE_UNSPECIFIED as limited range (MPEG) + // This follows the convention used in other FFmpeg filters (e.g., vf_zscale) + int color_range = (in->color_range == AVCOL_RANGE_UNSPECIFIED) ? AVCOL_RANGE_MPEG : in->color_range; + // scale primary plane(s). Usually Y (and A), or single plane of RGB frames. ret = call_resize_kernel(ctx, s->cu_func, tex, in->crop_left, in->crop_top, crop_width, crop_height, - out, out->width, out->height, out->linesize[0]); + out, out->width, out->height, out->linesize[0], color_range); if (ret < 0) goto exit; @@ -503,7 +540,7 @@ static int scalecuda_resize(AVFilterContext *ctx, out, AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), - out->linesize[1]); + out->linesize[1], color_range); if (ret < 0) goto exit; } diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu index 271b55cd5d..ed5432a53f 100644 --- a/libavfilter/vf_scale_cuda.cu +++ b/libavfilter/vf_scale_cuda.cu @@ -35,9 +35,18 @@ using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo, static const ushort mask_10bit = 0xFFC0; static const ushort mask_16bit = 0xFFFF; -static inline __device__ ushort conv_8to16(uchar in, ushort mask) +static inline __device__ ushort conv_8to16(uchar in, ushort mask, int color_range) { - return ((ushort)in | ((ushort)in << 8)) & mask; + ushort shifted = (ushort)in << 8; + // AVCOL_RANGE_MPEG = 1 (limited range) + return (color_range == 1) ? shifted : ((shifted | ((ushort)in )) & mask); +} + +static inline __device__ ushort conv_8to10pl(uchar in, int color_range) +{ + ushort shifted = (ushort)in << 2; + // AVCOL_RANGE_MPEG = 1 (limited range) + return (color_range == 1) ? shifted : (shifted | ((ushort)in >> 6)); } static inline __device__ uchar conv_16to8(ushort in) @@ -50,9 +59,23 @@ static inline __device__ uchar conv_10to8(ushort in) return in >> 8; } -static inline __device__ ushort conv_10to16(ushort in) +static inline __device__ uchar conv_10to8pl(ushort in) { - return in | (in >> 10); + return in >> 2; +} + +static inline __device__ ushort conv_10to16(ushort in, int color_range) +{ + ushort shifted = (in >> 10); + // AVCOL_RANGE_MPEG = 1 (limited range) + return (color_range == 1) ? in : (in | shifted); +} + +static inline __device__ ushort conv_10to16pl(ushort in, int color_range) +{ + ushort shifted = (in << 6); + // AVCOL_RANGE_MPEG = 1 (limited range) + return (color_range == 1) ? shifted : (shifted | (in >> 4)); } static inline __device__ ushort conv_16to10(ushort in) @@ -60,12 +83,18 @@ static inline __device__ ushort conv_16to10(ushort in) return in & mask_10bit; } +static inline __device__ ushort conv_16to10pl(ushort in) +{ + return in >> 6; +} + #define DEF_F(N, T) \ template<subsample_function_t<in_T> subsample_func_y, \ subsample_function_t<in_T_uv> subsample_func_uv> \ __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \ int dst_width, int dst_height, int dst_pitch, \ - int src_left, int src_top, int src_width, int src_height, float param) + int src_left, int src_top, int src_width, int src_height, \ + float param, int color_range) #define SUB_F(m, plane) \ subsample_func_##m(src_tex[plane], xo, yo, \ @@ -81,9 +110,9 @@ static inline __device__ ushort conv_16to10(ushort in) #define DEFAULT_DST(n) \ dst[n][yo*FIXED_PITCH+xo] -// yuv420p->X +// planar8->X -struct Convert_yuv420p_yuv420p +struct Convert_planar8_planar8 { static const int in_bit_depth = 8; typedef uchar in_T; @@ -103,7 +132,47 @@ struct Convert_yuv420p_yuv420p } }; -struct Convert_yuv420p_nv12 +struct Convert_planar8_planar10 +{ + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = conv_8to10pl(SUB_F(uv, 1), color_range); + DEFAULT_DST(2) = conv_8to10pl(SUB_F(uv, 2), color_range); + } +}; + +struct Convert_planar8_planar16 +{ + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit, color_range); + DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit, color_range); + } +}; + +struct Convert_planar8_semiplanar8 { static const int in_bit_depth = 8; typedef uchar in_T; @@ -125,14 +194,82 @@ struct Convert_yuv420p_nv12 } }; -struct Convert_yuv420p_yuv444p +struct Convert_planar8_semiplanar10 { static const int in_bit_depth = 8; typedef uchar in_T; typedef uchar in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = make_ushort2( + conv_8to16(SUB_F(uv, 1), mask_10bit, color_range), + conv_8to16(SUB_F(uv, 2), mask_10bit, color_range) + ); + } +}; + +struct Convert_planar8_semiplanar16 +{ + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = make_ushort2( + conv_8to16(SUB_F(uv, 1), mask_16bit, color_range), + conv_8to16(SUB_F(uv, 2), mask_16bit, color_range) + ); + } +}; + + + +// planar10->X + +struct Convert_planar10_planar8 +{ + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; typedef uchar out_T; typedef uchar out_T_uv; + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0)); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = conv_10to8pl(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_10to8pl(SUB_F(uv, 2)); + } +}; + +struct Convert_planar10_planar10 +{ + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + DEF_F(Convert, out_T) { DEFAULT_DST(0) = SUB_F(y, 0); @@ -145,249 +282,227 @@ struct Convert_yuv420p_yuv444p } }; -struct Convert_yuv420p_p010le +struct Convert_planar10_planar16 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_10bit), - conv_8to16(SUB_F(uv, 2), mask_10bit) - ); - } -}; - -struct Convert_yuv420p_p016le -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_16bit), - conv_8to16(SUB_F(uv, 2), mask_16bit) - ); - } -}; - -struct Convert_yuv420p_yuv444p16le -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; typedef ushort out_T; typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); + DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), color_range); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit); - DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit); + DEFAULT_DST(1) = conv_10to16pl(SUB_F(uv, 1), color_range); + DEFAULT_DST(2) = conv_10to16pl(SUB_F(uv, 2), color_range); } }; -// nv12->X - -struct Convert_nv12_yuv420p +struct Convert_planar10_semiplanar8 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = SUB_F(y, 0); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = res.x; - DEFAULT_DST(2) = res.y; - } -}; - -struct Convert_nv12_nv12 -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; typedef uchar out_T; typedef uchar2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = SUB_F(uv, 1); - } -}; - -struct Convert_nv12_yuv444p -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = SUB_F(y, 0); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = res.x; - DEFAULT_DST(2) = res.y; - } -}; - -struct Convert_nv12_p010le -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = make_ushort2( - conv_8to16(res.x, mask_10bit), - conv_8to16(res.y, mask_10bit) - ); - } -}; - -struct Convert_nv12_p016le -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = make_ushort2( - conv_8to16(res.x, mask_16bit), - conv_8to16(res.y, mask_16bit) - ); - } -}; - -struct Convert_nv12_yuv444p16le -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef ushort out_T; - typedef ushort out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit); - DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit); - } -}; - -// yuv444p->X - -struct Convert_yuv444p_yuv420p -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = SUB_F(y, 0); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = SUB_F(uv, 1); - DEFAULT_DST(2) = SUB_F(uv, 2); - } -}; - -struct Convert_yuv444p_nv12 -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { DEFAULT_DST(1) = make_uchar2( + conv_10to8pl(SUB_F(uv, 1)), + conv_10to8pl(SUB_F(uv, 2)) + ); + } +}; + +struct Convert_planar10_semiplanar10 +{ + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = (SUB_F(y, 0) << 6); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = make_ushort2( + (SUB_F(uv, 1) << 6), + (SUB_F(uv, 2) << 6) + ); + } +}; + +struct Convert_planar10_semiplanar16 +{ + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = make_ushort2( + conv_10to16pl(SUB_F(uv, 1), color_range), + conv_10to16pl(SUB_F(uv, 2), color_range) + ); + } +}; + +// planar16->X + +struct Convert_planar16_planar8 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef uchar out_T; + typedef uchar out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); + } +}; + +struct Convert_planar16_planar10 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0)); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = conv_16to10pl(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_16to10pl(SUB_F(uv, 2)); + } +}; + +struct Convert_planar16_planar16 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = SUB_F(y, 0); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = SUB_F(uv, 1); + DEFAULT_DST(2) = SUB_F(uv, 2); + } +}; + +struct Convert_planar16_semiplanar8 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = make_uchar2( + conv_16to8(SUB_F(uv, 1)), + conv_16to8(SUB_F(uv, 2)) + ); + } +}; + +struct Convert_planar16_semiplanar10 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = make_ushort2( + conv_16to10(SUB_F(uv, 1)), + conv_16to10(SUB_F(uv, 2)) + ); + } +}; + +struct Convert_planar16_semiplanar16 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = SUB_F(y, 0); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = make_ushort2( SUB_F(uv, 1), SUB_F(uv, 2) ); } }; -struct Convert_yuv444p_yuv444p +// semiplanar8->X + +struct Convert_semiplanar8_planar8 { static const int in_bit_depth = 8; typedef uchar in_T; - typedef uchar in_T_uv; + typedef uchar2 in_T_uv; typedef uchar out_T; typedef uchar out_T_uv; @@ -398,78 +513,122 @@ struct Convert_yuv444p_yuv444p DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = SUB_F(uv, 1); - DEFAULT_DST(2) = SUB_F(uv, 2); + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = res.x; + DEFAULT_DST(2) = res.y; } }; -struct Convert_yuv444p_p010le +struct Convert_semiplanar8_planar10 { static const int in_bit_depth = 8; typedef uchar in_T; - typedef uchar in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_10bit), - conv_8to16(SUB_F(uv, 2), mask_10bit) - ); - } -}; - -struct Convert_yuv444p_p016le -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_16bit), - conv_8to16(SUB_F(uv, 2), mask_16bit) - ); - } -}; - -struct Convert_yuv444p_yuv444p16le -{ - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; + typedef uchar2 in_T_uv; typedef ushort out_T; typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); + DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), color_range); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit); - DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit); + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = conv_8to10pl(res.x, color_range); + DEFAULT_DST(2) = conv_8to10pl(res.y, color_range); } }; -// p010le->X +struct Convert_semiplanar8_planar16 +{ + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; -struct Convert_p010le_yuv420p + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit, color_range); + DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit, color_range); + } +}; + +struct Convert_semiplanar8_semiplanar8 +{ + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = SUB_F(y, 0); + } + + DEF_F(Convert_uv, out_T_uv) + { + DEFAULT_DST(1) = SUB_F(uv, 1); + } +}; + +struct Convert_semiplanar8_semiplanar10 +{ + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = make_ushort2( + conv_8to16(res.x, mask_10bit, color_range), + conv_8to16(res.y, mask_10bit, color_range) + ); + } +}; + +struct Convert_semiplanar8_semiplanar16 +{ + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; + typedef ushort out_T; + typedef ushort2 out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = make_ushort2( + conv_8to16(res.x, mask_16bit, color_range), + conv_8to16(res.y, mask_16bit, color_range) + ); + } +}; + +// semiplanar10->X + +struct Convert_semiplanar10_planar8 { static const int in_bit_depth = 10; typedef ushort in_T; @@ -490,7 +649,49 @@ struct Convert_p010le_yuv420p } }; -struct Convert_p010le_nv12 +struct Convert_semiplanar10_planar10 +{ + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = SUB_F(y, 0) >> 6; + } + + DEF_F(Convert_uv, out_T_uv) + { + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = res.x >> 6; + DEFAULT_DST(2) = res.y >> 6; + } +}; + +struct Convert_semiplanar10_planar16 +{ + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), color_range); + } + + DEF_F(Convert_uv, out_T_uv) + { + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = conv_10to16(res.x, color_range); + DEFAULT_DST(2) = conv_10to16(res.y, color_range); + } +}; + +struct Convert_semiplanar10_semiplanar8 { static const int in_bit_depth = 10; typedef ushort in_T; @@ -513,28 +714,7 @@ struct Convert_p010le_nv12 } }; -struct Convert_p010le_yuv444p -{ - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_10to8(res.x); - DEFAULT_DST(2) = conv_10to8(res.y); - } -}; - -struct Convert_p010le_p010le +struct Convert_semiplanar10_semiplanar10 { static const int in_bit_depth = 10; typedef ushort in_T; @@ -553,7 +733,7 @@ struct Convert_p010le_p010le } }; -struct Convert_p010le_p016le +struct Convert_semiplanar10_semiplanar16 { static const int in_bit_depth = 10; typedef ushort in_T; @@ -563,43 +743,23 @@ struct Convert_p010le_p016le DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), color_range); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); DEFAULT_DST(1) = make_ushort2( - conv_10to16(res.x), - conv_10to16(res.y) + conv_10to16(res.x, color_range), + conv_10to16(res.y, color_range) ); } }; -struct Convert_p010le_yuv444p16le -{ - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; - typedef ushort out_T; - typedef ushort out_T_uv; - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); - } +// semiplanar16->X - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_10to16(res.x); - DEFAULT_DST(2) = conv_10to16(res.y); - } -}; - -// p016le->X - -struct Convert_p016le_yuv420p +struct Convert_semiplanar16_planar8 { static const int in_bit_depth = 16; typedef ushort in_T; @@ -620,7 +780,49 @@ struct Convert_p016le_yuv420p } }; -struct Convert_p016le_nv12 +struct Convert_semiplanar16_planar10 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0)); + } + + DEF_F(Convert_uv, out_T_uv) + { + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = conv_16to10pl(res.x); + DEFAULT_DST(2) = conv_16to10pl(res.y); + } +}; + +struct Convert_semiplanar16_planar16 +{ + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; + + DEF_F(Convert, out_T) + { + DEFAULT_DST(0) = SUB_F(y, 0); + } + + DEF_F(Convert_uv, out_T_uv) + { + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = res.x; + DEFAULT_DST(2) = res.y; + } +}; + +struct Convert_semiplanar16_semiplanar8 { static const int in_bit_depth = 16; typedef ushort in_T; @@ -643,28 +845,7 @@ struct Convert_p016le_nv12 } }; -struct Convert_p016le_yuv444p -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort2 in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_16to8(res.x); - DEFAULT_DST(2) = conv_16to8(res.y); - } -}; - -struct Convert_p016le_p010le +struct Convert_semiplanar16_semiplanar10 { static const int in_bit_depth = 16; typedef ushort in_T; @@ -687,7 +868,7 @@ struct Convert_p016le_p010le } }; -struct Convert_p016le_p016le +struct Convert_semiplanar16_semiplanar16 { static const int in_bit_depth = 16; typedef ushort in_T; @@ -706,155 +887,6 @@ struct Convert_p016le_p016le } }; -struct Convert_p016le_yuv444p16le -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort2 in_T_uv; - typedef ushort out_T; - typedef ushort out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = SUB_F(y, 0); - } - - DEF_F(Convert_uv, out_T_uv) - { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = res.x; - DEFAULT_DST(2) = res.y; - } -}; - -// yuv444p16le->X - -struct Convert_yuv444p16le_yuv420p -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); - DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); - } -}; - -struct Convert_yuv444p16le_nv12 -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = make_uchar2( - conv_16to8(SUB_F(uv, 1)), - conv_16to8(SUB_F(uv, 2)) - ); - } -}; - -struct Convert_yuv444p16le_yuv444p -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); - DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); - } -}; - -struct Convert_yuv444p16le_p010le -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = make_ushort2( - conv_16to10(SUB_F(uv, 1)), - conv_16to10(SUB_F(uv, 2)) - ); - } -}; - -struct Convert_yuv444p16le_p016le -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = SUB_F(y, 0); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = make_ushort2( - SUB_F(uv, 1), - SUB_F(uv, 2) - ); - } -}; - -struct Convert_yuv444p16le_yuv444p16le -{ - static const int in_bit_depth = 16; - typedef ushort in_T; - typedef ushort in_T_uv; - typedef ushort out_T; - typedef ushort out_T_uv; - - DEF_F(Convert, out_T) - { - DEFAULT_DST(0) = SUB_F(y, 0); - } - - DEF_F(Convert_uv, out_T_uv) - { - DEFAULT_DST(1) = SUB_F(uv, 1); - DEFAULT_DST(2) = SUB_F(uv, 2); - } -}; - #define DEF_CONVERT_IDENTITY(fmt1, fmt2)\ \ struct Convert_##fmt1##_##fmt2 \ @@ -930,7 +962,7 @@ struct Convert_bgr0_bgra res.x, res.y, res.z, - 1 + 0xFF ); } @@ -954,7 +986,7 @@ struct Convert_bgr0_rgba res.z, res.y, res.x, - 1 + 0xFF ); } @@ -978,7 +1010,7 @@ struct Convert_rgb0_bgra res.z, res.y, res.x, - 1 + 0xFF ); } @@ -1002,7 +1034,7 @@ struct Convert_rgb0_rgba res.x, res.y, res.z, - 1 + 0xFF ); } @@ -1147,25 +1179,26 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, /// --- FUNCTION EXPORTS --- -#define KERNEL_ARGS(T) \ - cudaTextureObject_t src_tex_0, cudaTextureObject_t src_tex_1, \ - cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \ - T *dst_0, T *dst_1, T *dst_2, T *dst_3, \ - int dst_width, int dst_height, int dst_pitch, \ - int src_left, int src_top, int src_width, int src_height, float param +#define KERNEL_ARGS(T) CUDAScaleKernelParams params #define SUBSAMPLE(Convert, T) \ - cudaTextureObject_t src_tex[4] = \ - { src_tex_0, src_tex_1, src_tex_2, src_tex_3 }; \ - T *dst[4] = { dst_0, dst_1, dst_2, dst_3 }; \ + cudaTextureObject_t src_tex[4] = { \ + params.src_tex[0], params.src_tex[1], \ + params.src_tex[2], params.src_tex[3] \ + }; \ + T *dst[4] = { \ + (T*)params.dst[0], (T*)params.dst[1], \ + (T*)params.dst[2], (T*)params.dst[3] \ + }; \ int xo = blockIdx.x * blockDim.x + threadIdx.x; \ int yo = blockIdx.y * blockDim.y + threadIdx.y; \ - if (yo >= dst_height || xo >= dst_width) return; \ + if (yo >= params.dst_height || xo >= params.dst_width) return; \ Convert( \ src_tex, dst, xo, yo, \ - dst_width, dst_height, dst_pitch, \ - src_left, src_top, \ - src_width, src_height, param); + params.dst_width, params.dst_height, params.dst_pitch, \ + params.src_left, params.src_top, \ + params.src_width, params.src_height, \ + params.param, params.color_range); extern "C" { @@ -1184,12 +1217,12 @@ extern "C" { NEAREST_KERNEL(C,_uv) #define NEAREST_KERNELS(C) \ - NEAREST_KERNEL_RAW(yuv420p_ ## C) \ - NEAREST_KERNEL_RAW(nv12_ ## C) \ - NEAREST_KERNEL_RAW(yuv444p_ ## C) \ - NEAREST_KERNEL_RAW(p010le_ ## C) \ - NEAREST_KERNEL_RAW(p016le_ ## C) \ - NEAREST_KERNEL_RAW(yuv444p16le_ ## C) + NEAREST_KERNEL_RAW(planar8_ ## C) \ + NEAREST_KERNEL_RAW(planar10_ ## C) \ + NEAREST_KERNEL_RAW(planar16_ ## C) \ + NEAREST_KERNEL_RAW(semiplanar8_ ## C) \ + NEAREST_KERNEL_RAW(semiplanar10_ ## C) \ + NEAREST_KERNEL_RAW(semiplanar16_ ## C) #define NEAREST_KERNELS_RGB(C) \ NEAREST_KERNEL_RAW(rgb0_ ## C) \ @@ -1197,12 +1230,12 @@ extern "C" { NEAREST_KERNEL_RAW(rgba_ ## C) \ NEAREST_KERNEL_RAW(bgra_ ## C) \ -NEAREST_KERNELS(yuv420p) -NEAREST_KERNELS(nv12) -NEAREST_KERNELS(yuv444p) -NEAREST_KERNELS(p010le) -NEAREST_KERNELS(p016le) -NEAREST_KERNELS(yuv444p16le) +NEAREST_KERNELS(planar8) +NEAREST_KERNELS(planar10) +NEAREST_KERNELS(planar16) +NEAREST_KERNELS(semiplanar8) +NEAREST_KERNELS(semiplanar10) +NEAREST_KERNELS(semiplanar16) NEAREST_KERNELS_RGB(rgb0) NEAREST_KERNELS_RGB(bgr0) @@ -1224,12 +1257,12 @@ NEAREST_KERNELS_RGB(bgra) BILINEAR_KERNEL(C,_uv) #define BILINEAR_KERNELS(C) \ - BILINEAR_KERNEL_RAW(yuv420p_ ## C) \ - BILINEAR_KERNEL_RAW(nv12_ ## C) \ - BILINEAR_KERNEL_RAW(yuv444p_ ## C) \ - BILINEAR_KERNEL_RAW(p010le_ ## C) \ - BILINEAR_KERNEL_RAW(p016le_ ## C) \ - BILINEAR_KERNEL_RAW(yuv444p16le_ ## C) + BILINEAR_KERNEL_RAW(planar8_ ## C) \ + BILINEAR_KERNEL_RAW(planar10_ ## C) \ + BILINEAR_KERNEL_RAW(planar16_ ## C) \ + BILINEAR_KERNEL_RAW(semiplanar8_ ## C) \ + BILINEAR_KERNEL_RAW(semiplanar10_ ## C) \ + BILINEAR_KERNEL_RAW(semiplanar16_ ## C) #define BILINEAR_KERNELS_RGB(C) \ BILINEAR_KERNEL_RAW(rgb0_ ## C) \ @@ -1237,12 +1270,12 @@ NEAREST_KERNELS_RGB(bgra) BILINEAR_KERNEL_RAW(rgba_ ## C) \ BILINEAR_KERNEL_RAW(bgra_ ## C) -BILINEAR_KERNELS(yuv420p) -BILINEAR_KERNELS(nv12) -BILINEAR_KERNELS(yuv444p) -BILINEAR_KERNELS(p010le) -BILINEAR_KERNELS(p016le) -BILINEAR_KERNELS(yuv444p16le) +BILINEAR_KERNELS(planar8) +BILINEAR_KERNELS(planar10) +BILINEAR_KERNELS(planar16) +BILINEAR_KERNELS(semiplanar8) +BILINEAR_KERNELS(semiplanar10) +BILINEAR_KERNELS(semiplanar16) BILINEAR_KERNELS_RGB(rgb0) BILINEAR_KERNELS_RGB(bgr0) @@ -1264,12 +1297,12 @@ BILINEAR_KERNELS_RGB(bgra) BICUBIC_KERNEL(C,_uv) #define BICUBIC_KERNELS(C) \ - BICUBIC_KERNEL_RAW(yuv420p_ ## C) \ - BICUBIC_KERNEL_RAW(nv12_ ## C) \ - BICUBIC_KERNEL_RAW(yuv444p_ ## C) \ - BICUBIC_KERNEL_RAW(p010le_ ## C) \ - BICUBIC_KERNEL_RAW(p016le_ ## C) \ - BICUBIC_KERNEL_RAW(yuv444p16le_ ## C) + BICUBIC_KERNEL_RAW(planar8_ ## C) \ + BICUBIC_KERNEL_RAW(planar10_ ## C) \ + BICUBIC_KERNEL_RAW(planar16_ ## C) \ + BICUBIC_KERNEL_RAW(semiplanar8_ ## C) \ + BICUBIC_KERNEL_RAW(semiplanar10_ ## C) \ + BICUBIC_KERNEL_RAW(semiplanar16_ ## C) #define BICUBIC_KERNELS_RGB(C) \ BICUBIC_KERNEL_RAW(rgb0_ ## C) \ @@ -1277,12 +1310,12 @@ BILINEAR_KERNELS_RGB(bgra) BICUBIC_KERNEL_RAW(rgba_ ## C) \ BICUBIC_KERNEL_RAW(bgra_ ## C) -BICUBIC_KERNELS(yuv420p) -BICUBIC_KERNELS(nv12) -BICUBIC_KERNELS(yuv444p) -BICUBIC_KERNELS(p010le) -BICUBIC_KERNELS(p016le) -BICUBIC_KERNELS(yuv444p16le) +BICUBIC_KERNELS(planar8) +BICUBIC_KERNELS(planar10) +BICUBIC_KERNELS(planar16) +BICUBIC_KERNELS(semiplanar8) +BICUBIC_KERNELS(semiplanar10) +BICUBIC_KERNELS(semiplanar16) BICUBIC_KERNELS_RGB(rgb0) BICUBIC_KERNELS_RGB(bgr0) @@ -1304,12 +1337,12 @@ BICUBIC_KERNELS_RGB(bgra) LANCZOS_KERNEL(C,_uv) #define LANCZOS_KERNELS(C) \ - LANCZOS_KERNEL_RAW(yuv420p_ ## C) \ - LANCZOS_KERNEL_RAW(nv12_ ## C) \ - LANCZOS_KERNEL_RAW(yuv444p_ ## C) \ - LANCZOS_KERNEL_RAW(p010le_ ## C) \ - LANCZOS_KERNEL_RAW(p016le_ ## C) \ - LANCZOS_KERNEL_RAW(yuv444p16le_ ## C) + LANCZOS_KERNEL_RAW(planar8_ ## C) \ + LANCZOS_KERNEL_RAW(planar10_ ## C) \ + LANCZOS_KERNEL_RAW(planar16_ ## C) \ + LANCZOS_KERNEL_RAW(semiplanar8_ ## C) \ + LANCZOS_KERNEL_RAW(semiplanar10_ ## C) \ + LANCZOS_KERNEL_RAW(semiplanar16_ ## C) #define LANCZOS_KERNELS_RGB(C) \ LANCZOS_KERNEL_RAW(rgb0_ ## C) \ @@ -1317,12 +1350,12 @@ BICUBIC_KERNELS_RGB(bgra) LANCZOS_KERNEL_RAW(rgba_ ## C) \ LANCZOS_KERNEL_RAW(bgra_ ## C) -LANCZOS_KERNELS(yuv420p) -LANCZOS_KERNELS(nv12) -LANCZOS_KERNELS(yuv444p) -LANCZOS_KERNELS(p010le) -LANCZOS_KERNELS(p016le) -LANCZOS_KERNELS(yuv444p16le) +LANCZOS_KERNELS(planar8) +LANCZOS_KERNELS(planar10) +LANCZOS_KERNELS(planar16) +LANCZOS_KERNELS(semiplanar8) +LANCZOS_KERNELS(semiplanar10) +LANCZOS_KERNELS(semiplanar16) LANCZOS_KERNELS_RGB(rgb0) LANCZOS_KERNELS_RGB(bgr0) diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h index 40d5b9cfac..1fb3498ee8 100644 --- a/libavfilter/vf_scale_cuda.h +++ b/libavfilter/vf_scale_cuda.h @@ -23,6 +23,22 @@ #ifndef AVFILTER_SCALE_CUDA_H #define AVFILTER_SCALE_CUDA_H +#include <ffnvcodec/dynlink_cuda.h> + #define SCALE_CUDA_PARAM_DEFAULT 999999.0f +typedef struct __attribute__((aligned(16))) { + CUtexObject src_tex[4]; + CUdeviceptr dst[4]; + int dst_width; + int dst_height; + int dst_pitch; + int src_left; + int src_top; + int src_width; + int src_height; + float param; + int color_range; +} CUDAScaleKernelParams; + #endif -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
