[FFmpeg-cvslog] lavu/pixdesc: handle xv30be in av_[read|write]_image_line
ffmpeg | branch: master | Philip Langdale | Sun Dec 4 12:53:57 2022 -0800| [9651f873f8ce15c5d4380f49488900bbf6e6c731] | committer: Philip Langdale lavu/pixdesc: handle xv30be in av_[read|write]_image_line xv30be is an obnoxious format that I shouldn't have included in the first place. xv30 packs 3 10bit channels into 32bits and while our byte-oriented logic can handle Little Endian correctly, it cannot handle Big Endian. To avoid that, I marked xv30be as a bitstream format, but while that didn't produce FATE errors, it turns out that the existing read/write code silently produces incorrect results, which can be revealed via ubsan. In all likelyhood, the correct fix here is to remove the format. As this format is only used by Intel vaapi, it's only going to show up in LE form, so we could just drop the BE version. But I don't want to deal with creating a hole in the pixfmt list and all the weirdness that comes from that. Instead, I decided to write the correct read/write code for it. And that code isn't too bad, as long as it's specialised for this format, as the channels are all bit-aligned inside a 32bit word. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9651f873f8ce15c5d4380f49488900bbf6e6c731 --- libavutil/pixdesc.c | 70 ++--- 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index ca3e204a0b..62a2ae08d9 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -46,19 +46,35 @@ void av_read_image_line2(void *dst, uint32_t *dst32 = dst; if (flags & AV_PIX_FMT_FLAG_BITSTREAM) { -int skip = x * step + comp.offset; -const uint8_t *p = data[plane] + y * linesize[plane] + (skip >> 3); -int shift = 8 - depth - (skip & 7); +if (depth == 10) { +// Assume all channels are packed into a 32bit value +const uint8_t *byte_p = data[plane] + y * linesize[plane]; +const uint32_t *p = (uint32_t *)byte_p; -while (w--) { -int val = (*p >> shift) & mask; -if (read_pal_component) -val = data[1][4*val + c]; -shift -= step; -p -= shift >> 3; -shift &= 7; -if (dst_element_size == 4) *dst32++ = val; -else *dst16++ = val; +while (w--) { +int val = AV_RB32(p); +val = (val >> comp.offset) & mask; +if (read_pal_component) +val = data[1][4*val + c]; +if (dst_element_size == 4) *dst32++ = val; +else *dst16++ = val; +p++; +} +} else { +int skip = x * step + comp.offset; +const uint8_t *p = data[plane] + y * linesize[plane] + (skip >> 3); +int shift = 8 - depth - (skip & 7); + +while (w--) { +int val = (*p >> shift) & mask; +if (read_pal_component) +val = data[1][4*val + c]; +shift -= step; +p -= shift >> 3; +shift &= 7; +if (dst_element_size == 4) *dst32++ = val; +else *dst16++ = val; +} } } else { const uint8_t *p = data[plane] + y * linesize[plane] + @@ -109,15 +125,29 @@ void av_write_image_line2(const void *src, const uint16_t *src16 = src; if (flags & AV_PIX_FMT_FLAG_BITSTREAM) { -int skip = x * step + comp.offset; -uint8_t *p = data[plane] + y * linesize[plane] + (skip >> 3); -int shift = 8 - depth - (skip & 7); +if (depth == 10) { +// Assume all channels are packed into a 32bit value +const uint8_t *byte_p = data[plane] + y * linesize[plane]; +uint32_t *p = (uint32_t *)byte_p; +int offset = comp.offset; +uint32_t mask = ((1ULL << depth) - 1) << offset; -while (w--) { -*p |= (src_element_size == 4 ? *src32++ : *src16++) << shift; -shift -= step; -p -= shift >> 3; -shift &= 7; +while (w--) { +uint16_t val = src_element_size == 4 ? *src32++ : *src16++; +AV_WB32(p, (AV_RB32(p) & ~mask) | (val << offset)); +p++; +} +} else { +int skip = x * step + comp.offset; +uint8_t *p = data[plane] + y * linesize[plane] + (skip >> 3); +int shift = 8 - depth - (skip & 7); + +while (w--) { +*p |= (src_element_size == 4 ? *src32++ : *src16++) << shift; +shift -= step; +p -= shift >> 3; +shift &= 7; +
[FFmpeg-cvslog] lavu/pixdesc: favour formats where depth and subsampling exactly match
ffmpeg | branch: master | Philip Langdale | Wed Sep 7 21:03:15 2022 -0700| [ed83a3a5bddf4c209157dc9f041eda0721b4c3e0] | committer: Philip Langdale lavu/pixdesc: favour formats where depth and subsampling exactly match Since introducing the various packed formats used by VAAPI (and p012), we've noticed that there's actually a gap in how av_find_best_pix_fmt_of_2 works. It doesn't actually assign any value to having the same bit depth as the source format, when comparing against formats with a higher bit depth. This usually doesn't matter, because av_get_padded_bits_per_pixel() will account for it. However, as many of these formats use padding internally, we find that av_get_padded_bits_per_pixel() actually returns the same value for the 10 bit, 12 bit, 16 bit flavours, etc. In these tied situations, we end up just picking the first of the two provided formats, even if the second one should be preferred because it matches the actual bit depth. This bug already existed if you tried to compare yuv420p10 against p016 and p010, for example, but it simply hadn't come up before so we never noticed. But now, we actually got a situation in the VAAPI VP9 decoder where it offers both p010 and p012 because Profile 3 could be either depth and ends up picking p012 for 10 bit content due to the ordering of the testing. In addition, in the process of testing the fix, I realised we have the same gap when it comes to chroma subsampling - we do not favour a format that has exactly the same subsampling vs one with less subsampling when all else is equal. To fix this, I'm introducing a small score penalty if the bit depth or subsampling doesn't exactly match the source format. This will break the tie in favour of the format with the exact match, but not offset any of the other scoring penalties we already have. I have added a set of tests around these formats which will fail without this fix. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ed83a3a5bddf4c209157dc9f041eda0721b4c3e0 --- libavutil/pixdesc.c | 31 +- libavutil/pixdesc.h | 15 +++-- libavutil/tests/pixfmt_best.c | 129 +++--- libavutil/version.h | 2 +- tests/ref/fate/pixfmt_best| 2 +- 5 files changed, 151 insertions(+), 28 deletions(-) diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index d7c6ebfdc4..b472a94f60 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -3013,9 +3013,16 @@ static int get_pix_fmt_score(enum AVPixelFormat dst_pix_fmt, for (i = 0; i < nb_components; i++) { int depth_minus1 = (dst_pix_fmt == AV_PIX_FMT_PAL8) ? 7/nb_components : (dst_desc->comp[i].depth - 1); -if (src_desc->comp[i].depth - 1 > depth_minus1 && (consider & FF_LOSS_DEPTH)) { +int depth_delta = src_desc->comp[i].depth - 1 - depth_minus1; +if (depth_delta > 0 && (consider & FF_LOSS_DEPTH)) { loss |= FF_LOSS_DEPTH; score -= 65536 >> depth_minus1; +} else if (depth_delta < 0 && (consider & FF_LOSS_EXCESS_DEPTH)) { +// Favour formats where bit depth exactly matches. If all other +// scoring is equal, we'd rather use the bit depth that most closely +// matches the source. +loss |= FF_LOSS_EXCESS_DEPTH; +score += depth_delta; } } @@ -3035,6 +3042,28 @@ static int get_pix_fmt_score(enum AVPixelFormat dst_pix_fmt, } } +if (consider & FF_LOSS_EXCESS_RESOLUTION) { +// Favour formats where chroma subsampling exactly matches. If all other +// scoring is equal, we'd rather use the subsampling that most closely +// matches the source. +if (dst_desc->log2_chroma_w < src_desc->log2_chroma_w) { +loss |= FF_LOSS_EXCESS_RESOLUTION; +score -= 1 << (src_desc->log2_chroma_w - dst_desc->log2_chroma_w); +} + +if (dst_desc->log2_chroma_h < src_desc->log2_chroma_h) { +loss |= FF_LOSS_EXCESS_RESOLUTION; +score -= 1 << (src_desc->log2_chroma_h - dst_desc->log2_chroma_h); +} + +// don't favour 411 over 420, because 420 has much better support on the +// decoder side. +if (dst_desc->log2_chroma_w == 1 && src_desc->log2_chroma_w == 2 && +dst_desc->log2_chroma_h == 1 && src_desc->log2_chroma_h == 2) { +score += 4; +} +} + if(consider & FF_LOSS_COLORSPACE) switch(dst_color) { case FF_COLOR_RGB: diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h index f8a195ffcd..48d9300bfe 100644 --- a/libavutil/pixdesc.h +++ b/libavutil/pixdesc.h @@ -357,12 +357,15 @@ void av_write_image_line(const uint16_t *src, uint8_t *data[4], */ enum AVPixelFormat av_pix_fmt_swap_endianness(en
[FFmpeg-cvslog] swscale/output: add support for Y210LE and Y212LE
ffmpeg | branch: master | Philip Langdale | Mon Sep 5 21:47:29 2022 -0700| [09a8e5debb284984871bd3eabd139b7207eedcdc] | committer: Philip Langdale swscale/output: add support for Y210LE and Y212LE > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=09a8e5debb284984871bd3eabd139b7207eedcdc --- libswscale/output.c | 48 libswscale/utils.c | 4 +-- libswscale/version.h | 2 +- tests/ref/fate/filter-pixdesc-y210le | 1 + tests/ref/fate/filter-pixdesc-y212le | 1 + tests/ref/fate/filter-pixfmts-copy | 2 ++ tests/ref/fate/filter-pixfmts-field | 2 ++ tests/ref/fate/filter-pixfmts-fieldorder | 2 ++ tests/ref/fate/filter-pixfmts-il | 2 ++ tests/ref/fate/filter-pixfmts-null | 2 ++ tests/ref/fate/filter-pixfmts-scale | 2 ++ tests/ref/fate/filter-pixfmts-vflip | 2 ++ 12 files changed, 67 insertions(+), 3 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 39e2a04609..2f599698e9 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2732,6 +2732,48 @@ yuv2vuyx_X_c(SwsContext *c, const int16_t *lumFilter, chrUSrc, chrVSrc, chrFilterSize, alpSrc, dest, dstW, y, 0); } +#define output_pixel(pos, val, bits) \ +AV_WL16(pos, av_clip_uintp2(val >> shift, bits) << output_shift); + +#define yuv2y2xx_wrapper(bits) \ +static void \ +yuv2y2 ## bits ## le_X_c(SwsContext *c, const int16_t *lumFilter, \ +const int16_t **lumSrc, int lumFilterSize, \ +const int16_t *chrFilter, \ +const int16_t **chrUSrc,\ +const int16_t **chrVSrc, int chrFilterSize, \ +const int16_t **alpSrc, \ +uint8_t *dest, int dstW, int y) \ +{ \ +int i, j; \ +int shift = 11 + 16 - bits; \ +int output_shift = 16 - bits; \ +for (i = 0; i < ((dstW + 1) >> 1); i++) { \ +int Y1 = 1 << (shift - 1), Y2 = 1 << (shift - 1); \ +int U = 1 << (shift - 1), V = 1 << (shift - 1); \ +\ +for (j = 0; j < lumFilterSize; j++) { \ +Y1 += lumSrc[j][i * 2] * lumFilter[j]; \ +Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; \ +} \ +\ +for (j = 0; j < chrFilterSize; j++) { \ +U += chrUSrc[j][i] * chrFilter[j]; \ +V += chrVSrc[j][i] * chrFilter[j]; \ +} \ +\ +output_pixel(dest + 8 * i + 0, Y1, bits); \ +output_pixel(dest + 8 * i + 2, U, bits); \ +output_pixel(dest + 8 * i + 4, Y2, bits); \ +output_pixel(dest + 8 * i + 6, V, bits); \ +} \ +} + +yuv2y2xx_wrapper(10) +yuv2y2xx_wrapper(12) + +#undef output_pixel + av_cold void ff_sws_init_output_funcs(SwsContext *c, yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX, @@ -3252,5 +3294,11 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_XV36LE: *yuv2packedX = yuv2xv36le_X_c; break; +case AV_PIX_FMT_Y210LE: +*yuv2packedX = yuv2y210le_X_c; +break; +case AV_PIX_FMT_Y212LE: +*yuv2packedX = yuv2y212le_X_c; +break; } } diff --git a/libswscale/utils.c b/libswscale/utils.c index ec67020cc9..14e2700733 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -248,8 +248,8 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_YUVA444P12LE] = { 1, 1 }, [AV_PIX_FMT_NV24]= { 1, 1 }, [AV_PIX_FMT_NV42]= { 1, 1 }, -[AV_PIX_FMT_Y210LE] = { 1, 0 }, -[AV_PIX_FMT_Y212LE] = { 1, 0 }, +[AV_PIX_FMT_Y210LE] = { 1, 1 }, +[AV_PIX_FMT_Y212LE] = { 1, 1 }, [AV_PIX_FMT_X2RGB10LE
[FFmpeg-cvslog] swscale/output: add support for XV30LE
ffmpeg | branch: master | Philip Langdale | Mon Sep 5 20:00:59 2022 -0700| [68181623e984b249402ac6fd0849c032b05ae143] | committer: Philip Langdale swscale/output: add support for XV30LE > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=68181623e984b249402ac6fd0849c032b05ae143 --- libswscale/output.c | 31 +++ libswscale/utils.c | 2 +- libswscale/version.h | 2 +- tests/ref/fate/filter-pixdesc-xv30le | 1 + tests/ref/fate/filter-pixfmts-copy | 1 + tests/ref/fate/filter-pixfmts-crop | 1 + tests/ref/fate/filter-pixfmts-field | 1 + tests/ref/fate/filter-pixfmts-fieldorder | 1 + tests/ref/fate/filter-pixfmts-hflip | 1 + tests/ref/fate/filter-pixfmts-il | 1 + tests/ref/fate/filter-pixfmts-null | 1 + tests/ref/fate/filter-pixfmts-scale | 1 + tests/ref/fate/filter-pixfmts-transpose | 1 + tests/ref/fate/filter-pixfmts-vflip | 1 + 14 files changed, 44 insertions(+), 2 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 228dab462e..39e2a04609 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2600,6 +2600,34 @@ yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2xv30le_X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, int y) +{ +int i; +for (i = 0; i < dstW; i++) { +int Y = 1 << 16, U = 1 << 16, V = 1 << 16; +int j; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * chrFilter[j]; +V += chrVSrc[j][i] * chrFilter[j]; +} + +Y = av_clip_uintp2(Y >> 17, 10); +U = av_clip_uintp2(U >> 17, 10); +V = av_clip_uintp2(V >> 17, 10); + +AV_WL32(dest + 4 * i, U | Y << 10 | V << 20); +} +} + static void yuv2xv36le_X_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, @@ -3218,6 +3246,9 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_VUYX: *yuv2packedX = yuv2vuyx_X_c; break; +case AV_PIX_FMT_XV30LE: +*yuv2packedX = yuv2xv30le_X_c; +break; case AV_PIX_FMT_XV36LE: *yuv2packedX = yuv2xv36le_X_c; break; diff --git a/libswscale/utils.c b/libswscale/utils.c index 9166e80002..ec67020cc9 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -265,7 +265,7 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_VUYX]= { 1, 1 }, [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, -[AV_PIX_FMT_XV30LE] = { 1, 0 }, +[AV_PIX_FMT_XV30LE] = { 1, 1 }, [AV_PIX_FMT_XV36LE] = { 1, 1 }, }; diff --git a/libswscale/version.h b/libswscale/version.h index c35e51138d..e8f1dadb8b 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 110 +#define LIBSWSCALE_VERSION_MICRO 111 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ diff --git a/tests/ref/fate/filter-pixdesc-xv30le b/tests/ref/fate/filter-pixdesc-xv30le new file mode 100644 index 00..9b5ad5417e --- /dev/null +++ b/tests/ref/fate/filter-pixdesc-xv30le @@ -0,0 +1 @@ +pixdesc-xv30le fb76a14d6d5cf3a0b48f30b2fb59becd diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy index c88594f3aa..67383c43f8 100644 --- a/tests/ref/fate/filter-pixfmts-copy +++ b/tests/ref/fate/filter-pixfmts-copy @@ -95,6 +95,7 @@ vuya3d5e934651cae1ce334001cb1829ad22 vuyx3f68ea6ec492b30d867cb5401562264e x2bgr10le 550c0d190cf695afa4eaacb644db6b75 x2rgb10le c1e3ac21be04a16bb157b22784524520 +xv30le c14b5a953bf3be56346f66ca174a5b1b xv36le 3f8ced42a081639a39ec5929dd77b017 xyz12be a1ef56bf746d71f59669c28e48fc8450 xyz12le 831ff03c1ba4ef19374686f16a064d8c diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop index bdad0d02cd..bdb2536f7d 100644 --- a/tests/ref/fate/filter-pixfmts-crop +++ b/tests/ref/fate/filter-pixfmts-crop @@ -92,6 +92,7 @@ vuya76578a705ff3a37559653c1289bd03dd vuyx5d2bae51a2f4892bd5f177f190cc323b x2bgr10le 84de725b85662c362862820dc4a309aa x2rgb10le f4265aca7a67dbfa93543700
[FFmpeg-cvslog] swscale/output: add support for XV36LE
ffmpeg | branch: master | Philip Langdale | Mon Sep 5 13:41:00 2022 -0700| [366f073c624779af852bacbc9a0a416e27ff96f7] | committer: Philip Langdale swscale/output: add support for XV36LE > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=366f073c624779af852bacbc9a0a416e27ff96f7 --- libswscale/output.c | 29 + libswscale/utils.c | 2 +- libswscale/version.h | 2 +- tests/ref/fate/filter-pixdesc-xv36le | 1 + tests/ref/fate/filter-pixfmts-copy | 1 + tests/ref/fate/filter-pixfmts-crop | 1 + tests/ref/fate/filter-pixfmts-field | 1 + tests/ref/fate/filter-pixfmts-fieldorder | 1 + tests/ref/fate/filter-pixfmts-hflip | 1 + tests/ref/fate/filter-pixfmts-il | 1 + tests/ref/fate/filter-pixfmts-null | 1 + tests/ref/fate/filter-pixfmts-scale | 1 + tests/ref/fate/filter-pixfmts-transpose | 1 + tests/ref/fate/filter-pixfmts-vflip | 1 + 14 files changed, 42 insertions(+), 2 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index da6c026916..228dab462e 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2600,6 +2600,32 @@ yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2xv36le_X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, int y) +{ +int i; +for (i = 0; i < dstW; i++) { +int Y = 1 << 14, U = 1 << 14, V = 1 << 14; +int j; + +for (j = 0; j < lumFilterSize; j++) +Y += lumSrc[j][i] * lumFilter[j]; + +for (j = 0; j < chrFilterSize; j++) { +U += chrUSrc[j][i] * chrFilter[j]; +V += chrVSrc[j][i] * chrFilter[j]; +} + +AV_WL16(dest + 8 * i + 2, av_clip_uintp2(Y >> 15, 12) << 4); +AV_WL16(dest + 8 * i + 0, av_clip_uintp2(U >> 15, 12) << 4); +AV_WL16(dest + 8 * i + 4, av_clip_uintp2(V >> 15, 12) << 4); +} +} + static void yuv2vuyX_X_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, @@ -3192,5 +3218,8 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_VUYX: *yuv2packedX = yuv2vuyx_X_c; break; +case AV_PIX_FMT_XV36LE: +*yuv2packedX = yuv2xv36le_X_c; +break; } } diff --git a/libswscale/utils.c b/libswscale/utils.c index 599c326754..9166e80002 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -266,7 +266,7 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, [AV_PIX_FMT_XV30LE] = { 1, 0 }, -[AV_PIX_FMT_XV36LE] = { 1, 0 }, +[AV_PIX_FMT_XV36LE] = { 1, 1 }, }; int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, diff --git a/libswscale/version.h b/libswscale/version.h index 284c13cc23..c35e51138d 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 109 +#define LIBSWSCALE_VERSION_MICRO 110 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ diff --git a/tests/ref/fate/filter-pixdesc-xv36le b/tests/ref/fate/filter-pixdesc-xv36le new file mode 100644 index 00..8ba8099423 --- /dev/null +++ b/tests/ref/fate/filter-pixdesc-xv36le @@ -0,0 +1 @@ +pixdesc-xv36le 9d00bb58092f8b6d5d6fd71a8aec719a diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy index d92dd169dc..c88594f3aa 100644 --- a/tests/ref/fate/filter-pixfmts-copy +++ b/tests/ref/fate/filter-pixfmts-copy @@ -95,6 +95,7 @@ vuya3d5e934651cae1ce334001cb1829ad22 vuyx3f68ea6ec492b30d867cb5401562264e x2bgr10le 550c0d190cf695afa4eaacb644db6b75 x2rgb10le c1e3ac21be04a16bb157b22784524520 +xv36le 3f8ced42a081639a39ec5929dd77b017 xyz12be a1ef56bf746d71f59669c28e48fc8450 xyz12le 831ff03c1ba4ef19374686f16a064d8c ya16be 37c07787e544f900c87b853253bfc8dd diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop index f7103a5906..bdad0d02cd 100644 --- a/tests/ref/fate/filter-pixfmts-crop +++ b/tests/ref/fate/filter-pixfmts-crop @@ -92,6 +92,7 @@ vuya76578a705ff3a37559653c1289bd03dd vuyx5d2bae51a2f4892bd5f177f190cc323b x2bgr10le 84de725b85662c362862820dc4a309aa x2rgb10le f4265aca7a67dbfa935
[FFmpeg-cvslog] swscale/output: add support for P012
ffmpeg | branch: master | Philip Langdale | Mon Sep 5 14:53:50 2022 -0700| [caf8d4d256cc21f09570bdcbdbe8dde4406834ca] | committer: Philip Langdale swscale/output: add support for P012 This generalises the existing P010 support. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=caf8d4d256cc21f09570bdcbdbe8dde4406834ca --- libswscale/output.c | 140 ++-- libswscale/utils.c | 4 +- libswscale/version.h| 2 +- tests/ref/fate/filter-pixdesc-p012be| 1 + tests/ref/fate/filter-pixdesc-p012le| 1 + tests/ref/fate/filter-pixfmts-copy | 2 + tests/ref/fate/filter-pixfmts-crop | 2 + tests/ref/fate/filter-pixfmts-field | 2 + tests/ref/fate/filter-pixfmts-hflip | 2 + tests/ref/fate/filter-pixfmts-il| 2 + tests/ref/fate/filter-pixfmts-null | 2 + tests/ref/fate/filter-pixfmts-pad | 1 + tests/ref/fate/filter-pixfmts-scale | 2 + tests/ref/fate/filter-pixfmts-transpose | 2 + tests/ref/fate/filter-pixfmts-vflip | 2 + 15 files changed, 105 insertions(+), 62 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 40a4476c6d..da6c026916 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -460,17 +460,18 @@ static void yuv2nv12cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, #define output_pixel(pos, val) \ if (big_endian) { \ -AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \ +AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits) << output_shift); \ } else { \ -AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \ +AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits) << output_shift); \ } -static void yuv2p010l1_c(const int16_t *src, +static void yuv2p01xl1_c(const int16_t *src, uint16_t *dest, int dstW, - int big_endian) + int big_endian, int output_bits) { int i; -int shift = 5; +int shift = 15 - output_bits; +int output_shift = 16 - output_bits; for (i = 0; i < dstW; i++) { int val = src[i] + (1 << (shift - 1)); @@ -478,12 +479,13 @@ static void yuv2p010l1_c(const int16_t *src, } } -static void yuv2p010lX_c(const int16_t *filter, int filterSize, +static void yuv2p01xlX_c(const int16_t *filter, int filterSize, const int16_t **src, uint16_t *dest, int dstW, - int big_endian) + int big_endian, int output_bits) { int i, j; -int shift = 17; +int shift = 11 + 16 - output_bits; +int output_shift = 16 - output_bits; for (i = 0; i < dstW; i++) { int val = 1 << (shift - 1); @@ -495,14 +497,15 @@ static void yuv2p010lX_c(const int16_t *filter, int filterSize, } } -static void yuv2p010cX_c(int big_endian, const uint8_t *chrDither, +static void yuv2p01xcX_c(int big_endian, const uint8_t *chrDither, const int16_t *chrFilter, int chrFilterSize, const int16_t **chrUSrc, const int16_t **chrVSrc, - uint8_t *dest8, int chrDstW) + uint8_t *dest8, int chrDstW, int output_bits) { uint16_t *dest = (uint16_t*)dest8; -int shift = 17; int i, j; +int shift = 11 + 16 - output_bits; +int output_shift = 16 - output_bits; for (i = 0; i < chrDstW; i++) { int u = 1 << (shift - 1); @@ -518,52 +521,65 @@ static void yuv2p010cX_c(int big_endian, const uint8_t *chrDither, } } -static void yuv2p010l1_LE_c(const int16_t *src, -uint8_t *dest, int dstW, -const uint8_t *dither, int offset) -{ -yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0); -} - -static void yuv2p010l1_BE_c(const int16_t *src, -uint8_t *dest, int dstW, -const uint8_t *dither, int offset) -{ -yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1); -} - -static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize, -const int16_t **src, uint8_t *dest, int dstW, -const uint8_t *dither, int offset) -{ -yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0); -} - -static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize, -const int16_t **src, uint8_t *dest, int dstW, -const uint8_t *dither, int offset) -{ -yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1); -} - -static void yuv2p010cX_LE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, -const int16_t *chrFilter, int chrFilterSize, -const int16_t **chrUSrc, const int16_t **chrVSrc, -
[FFmpeg-cvslog] swscale/input: add support for Y212LE
ffmpeg | branch: master | Philip Langdale | Sun Sep 4 16:42:32 2022 -0700| [4a59eba227135f90a59a412a0175c783dc0be6d5] | committer: Philip Langdale swscale/input: add support for Y212LE > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4a59eba227135f90a59a412a0175c783dc0be6d5 --- libswscale/input.c | 45 ++--- libswscale/utils.c | 1 + libswscale/version.h | 2 +- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index f4f08c8f72..be8f3940e1 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -559,23 +559,32 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, con av_assert1(src1 == src2); } -static void y210le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, -const uint8_t *unused1, int width, uint32_t *unused2, void *opq) -{ -int i; -for (i = 0; i < width; i++) { -AV_WN16(dstU + i * 2, AV_RL16(src + i * 8 + 2) >> 6); -AV_WN16(dstV + i * 2, AV_RL16(src + i * 8 + 6) >> 6); +#define y21xle_wrapper(bits, shift) \ +static void y2 ## bits ## le_UV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0,\ + const uint8_t *src,\ + const uint8_t *unused1, int width, \ + uint32_t *unused2, void *opq) \ +{\ +int i; \ +for (i = 0; i < width; i++) {\ +AV_WN16(dstU + i * 2, AV_RL16(src + i * 8 + 2) >> shift);\ +AV_WN16(dstV + i * 2, AV_RL16(src + i * 8 + 6) >> shift);\ +}\ +}\ + \ +static void y2 ## bits ## le_Y_c(uint8_t *dst, const uint8_t *src, \ + const uint8_t *unused0, \ + const uint8_t *unused1, int width, \ + uint32_t *unused2, void *opq) \ +{\ +int i; \ +for (i = 0; i < width; i++) \ +AV_WN16(dst + i * 2, AV_RL16(src + i * 4) >> shift); \ } -} -static void y210le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, - const uint8_t *unused1, int width, uint32_t *unused2, void *opq) -{ -int i; -for (i = 0; i < width; i++) -AV_WN16(dst + i * 2, AV_RL16(src + i * 4) >> 6); -} +y21xle_wrapper(10, 6); +y21xle_wrapper(12, 4); static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused, void *opq) @@ -1447,6 +1456,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_Y210LE: c->chrToYV12 = y210le_UV_c; break; +case AV_PIX_FMT_Y212LE: +c->chrToYV12 = y212le_UV_c; +break; } if (c->chrSrcHSubSample) { switch (srcFormat) { @@ -1932,6 +1944,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_Y210LE: c->lumToYV12 = y210le_Y_c; break; +case AV_PIX_FMT_Y212LE: +c->lumToYV12 = y212le_Y_c; +break; case AV_PIX_FMT_X2RGB10LE: c->lumToYV12 = rgb30leToY_c; break; diff --git a/libswscale/utils.c b/libswscale/utils.c index ab86037cd4..a5a9bc589a 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -249,6 +249,7 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_NV24]= { 1, 1 }, [AV_PIX_FMT_NV42]= { 1, 1 }, [AV_PIX_FMT_Y210LE] = { 1, 0 }, +[AV_PIX_FMT_Y212LE] = { 1, 0 }, [AV_PIX_FMT_X2RGB10LE] = { 1, 1 }, [AV_PIX_FMT_X2BGR10LE] = { 1, 1 }, [AV_PIX_FMT_P210BE] = { 1, 1 }, diff --git a/libswscale/version.h b/libswscale/version.h index d2880590a6..908995b7b0 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 107 +#define LIBSWSCALE_VERSION_MICRO 108 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ ___
[FFmpeg-cvslog] swscale/input: add support for XV30LE
ffmpeg | branch: master | Philip Langdale | Sun Sep 4 16:32:06 2022 -0700| [198b5b90d5ab1c48aa54e0c6f2b6acd28487b0b3] | committer: Philip Langdale swscale/input: add support for XV30LE > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=198b5b90d5ab1c48aa54e0c6f2b6acd28487b0b3 --- libswscale/input.c | 25 + libswscale/utils.c | 1 + libswscale/version.h | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/libswscale/input.c b/libswscale/input.c index babedfd541..f4f08c8f72 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -685,6 +685,25 @@ static void read_vuya_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *unuse dst[i] = src[i * 4 + 3]; } +static void read_xv30le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, + uint32_t *unused2, void *opq) +{ +int i; +for (i = 0; i < width; i++) +AV_WN16(dst + i * 2, (AV_RL32(src + i * 4) >> 10) & 0x3FFu); +} + + +static void read_xv30le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) +{ +int i; +for (i = 0; i < width; i++) { +AV_WN16(dstU + i * 2, AV_RL32(src + i * 4) & 0x3FFu); +AV_WN16(dstV + i * 2, (AV_RL32(src + i * 4) >> 20) & 0x3FFu); +} +} + static void read_xv36le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, uint32_t *unused2, void *opq) { @@ -1390,6 +1409,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_VUYX: c->chrToYV12 = read_vuyx_UV_c; break; +case AV_PIX_FMT_XV30LE: +c->chrToYV12 = read_xv30le_UV_c; +break; case AV_PIX_FMT_AYUV64LE: c->chrToYV12 = read_ayuv64le_UV_c; break; @@ -1777,6 +1799,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_VUYX: c->lumToYV12 = read_vuyx_Y_c; break; +case AV_PIX_FMT_XV30LE: +c->lumToYV12 = read_xv30le_Y_c; +break; case AV_PIX_FMT_AYUV64LE: c->lumToYV12 = read_ayuv64le_Y_c; break; diff --git a/libswscale/utils.c b/libswscale/utils.c index a7f77cd39d..ab86037cd4 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -264,6 +264,7 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_VUYX]= { 1, 1 }, [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, +[AV_PIX_FMT_XV30LE] = { 1, 0 }, [AV_PIX_FMT_XV36LE] = { 1, 0 }, }; diff --git a/libswscale/version.h b/libswscale/version.h index dd517a1fc6..d2880590a6 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 106 +#define LIBSWSCALE_VERSION_MICRO 107 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] swscale/input: add support for P012
ffmpeg | branch: master | Philip Langdale | Sun Sep 4 15:43:23 2022 -0700| [5bdd7261150db5d254d588f6cf8f038c149e63b5] | committer: Philip Langdale swscale/input: add support for P012 As we now have three of these formats, I added macros to generate the conversion functions. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5bdd7261150db5d254d588f6cf8f038c149e63b5 --- libswscale/input.c | 123 +++ libswscale/utils.c | 2 + libswscale/version.h | 2 +- 3 files changed, 67 insertions(+), 60 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 8032360907..babedfd541 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -749,67 +749,60 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, nvXXtoUV_c(dstV, dstU, src1, width); } -static void p010LEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, -const uint8_t *unused2, int width, uint32_t *unused, void *opq) -{ -int i; -for (i = 0; i < width; i++) { -AV_WN16(dst + i * 2, AV_RL16(src + i * 2) >> 6); -} -} - -static void p010BEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, -const uint8_t *unused2, int width, uint32_t *unused, void *opq) -{ -int i; -for (i = 0; i < width; i++) { -AV_WN16(dst + i * 2, AV_RB16(src + i * 2) >> 6); +#define p01x_uv_wrapper(bits, shift) \ +static void p0 ## bits ## LEToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0, \ + const uint8_t *src1, \ + const uint8_t *src2, int width, \ + uint32_t *unused, void *opq) \ +{\ +int i; \ +for (i = 0; i < width; i++) {\ +AV_WN16(dstU + i * 2, AV_RL16(src1 + i * 4 + 0) >> shift); \ +AV_WN16(dstV + i * 2, AV_RL16(src1 + i * 4 + 2) >> shift); \ +}\ +}\ + \ +static void p0 ## bits ## BEToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused0, \ + const uint8_t *src1, \ + const uint8_t *src2, int width, \ + uint32_t *unused, void *opq) \ +{\ +int i; \ +for (i = 0; i < width; i++) {\ +AV_WN16(dstU + i * 2, AV_RB16(src1 + i * 4 + 0) >> shift); \ +AV_WN16(dstV + i * 2, AV_RB16(src1 + i * 4 + 2) >> shift); \ +}\ } -} -static void p010LEToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused, void *opq) -{ -int i; -for (i = 0; i < width; i++) { -AV_WN16(dstU + i * 2, AV_RL16(src1 + i * 4 + 0) >> 6); -AV_WN16(dstV + i * 2, AV_RL16(src1 + i * 4 + 2) >> 6); -} -} - -static void p010BEToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused, void *opq) -{ -int i; -for (i = 0; i < width; i++) { -AV_WN16(dstU + i * 2, AV_RB16(src1 + i * 4 + 0) >> 6); -AV_WN16(dstV + i * 2, AV_RB16(src1 + i * 4 + 2) >> 6); -} -} - -static void p016LEToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused, void *opq) -{ -int i; -for (i = 0; i < width; i++) { -AV_WN16(dstU + i * 2, AV_RL16(src1 + i * 4 + 0)); -AV_WN16(dstV + i * 2, AV_RL16(src1 + i * 4 + 2)); -} -} - -static void p016BEToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused, void *opq) -{ -int i; -for (i = 0; i < width; i++) { -AV_WN16(dstU + i * 2, AV_RB16(src1 + i * 4 + 0)); -AV_WN16(dstV + i * 2, AV_RB16(src1 + i * 4 + 2)); -} -} +#define p01x_wrapper
[FFmpeg-cvslog] swscale/input: add support for XV36LE
ffmpeg | branch: master | Philip Langdale | Sat Sep 3 15:58:47 2022 -0700| [8d9462844a85b0546c827a5f2c4cc7a1ba49dc9d] | committer: Philip Langdale swscale/input: add support for XV36LE > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8d9462844a85b0546c827a5f2c4cc7a1ba49dc9d --- libswscale/input.c | 25 + libswscale/utils.c | 1 + libswscale/version.h | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/libswscale/input.c b/libswscale/input.c index 92681c9c53..8032360907 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -685,6 +685,25 @@ static void read_vuya_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *unuse dst[i] = src[i * 4 + 3]; } +static void read_xv36le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, + uint32_t *unused2, void *opq) +{ +int i; +for (i = 0; i < width; i++) +AV_WN16(dst + i * 2, AV_RL16(src + i * 8 + 2) >> 4); +} + + +static void read_xv36le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) +{ +int i; +for (i = 0; i < width; i++) { +AV_WN16(dstU + i * 2, AV_RL16(src + i * 8 + 0) >> 4); +AV_WN16(dstV + i * 2, AV_RL16(src + i * 8 + 4) >> 4); +} +} + /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, @@ -1381,6 +1400,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_AYUV64LE: c->chrToYV12 = read_ayuv64le_UV_c; break; +case AV_PIX_FMT_XV36LE: +c->chrToYV12 = read_xv36le_UV_c; +break; case AV_PIX_FMT_P010LE: case AV_PIX_FMT_P210LE: case AV_PIX_FMT_P410LE: @@ -1759,6 +1781,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_AYUV64LE: c->lumToYV12 = read_ayuv64le_Y_c; break; +case AV_PIX_FMT_XV36LE: +c->lumToYV12 = read_xv36le_Y_c; +break; case AV_PIX_FMT_YUYV422: case AV_PIX_FMT_YVYU422: case AV_PIX_FMT_YA8: diff --git a/libswscale/utils.c b/libswscale/utils.c index a621a35862..a67e07b612 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -262,6 +262,7 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_VUYX]= { 1, 1 }, [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, +[AV_PIX_FMT_XV36LE] = { 1, 0 }, }; int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, diff --git a/libswscale/version.h b/libswscale/version.h index 17264b45da..403fc8f9e7 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 104 +#define LIBSWSCALE_VERSION_MICRO 105 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavu/hwcontext_vulkan: support mapping VUYX, P012, and XV36
ffmpeg | branch: master | Philip Langdale | Sat Aug 20 09:47:27 2022 -0700| [2f9b8bbd1f415769e1da03571e09c74a78987a3d] | committer: Philip Langdale lavu/hwcontext_vulkan: support mapping VUYX, P012, and XV36 If we want to be able to map between VAAPI and Vulkan (to do Vulkan filtering), we need to have matching formats on each side. The mappings here are not exact. In the same way that P010 is still mapped to full 16 bit formats, P012 has to be mapped that way as well. Similarly, VUYX has to be mapped to an alpha-equipped format, and XV36 has to be mapped to a fully 16bit alpha-equipped format. While Vulkan seems to fundamentally lack formats with an undefined, but physically present, alpha channel, it has have 10X6 and 12X4 formats that you could imagine using for P010, P012 and XV36, but these formats don't support the STORAGE usage flag. Today, hwcontext_vulkan requires all formats to be storable because it wants to be able to use them to create writable images. Until that changes, which might happen, we have to restrict the set of formats we use. Finally, when mapping a Vulkan image back to vaapi, I observed that the VK_FORMAT_R16G16B16A16_UNORM format we have to use for XV36 going to Vulkan is mapped to Y416 when going to vaapi (which makes sense as it's the exact matching format) so I had to add an entry for it even though we don't use it directly. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f9b8bbd1f415769e1da03571e09c74a78987a3d --- libavutil/hwcontext_vulkan.c | 13 + libavutil/version.h | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 237caa4bc0..f1db1c7291 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -173,6 +173,7 @@ static const struct { { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, +{ AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, @@ -210,6 +211,9 @@ static const struct { { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, +{ AV_PIX_FMT_VUYX, { VK_FORMAT_R8G8B8A8_UNORM } }, +{ AV_PIX_FMT_XV36, { VK_FORMAT_R16G16B16A16_UNORM } }, + { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, @@ -2629,6 +2633,15 @@ static const struct { { DRM_FORMAT_XRGB, VK_FORMAT_B8G8R8A8_UNORM }, { DRM_FORMAT_ABGR, VK_FORMAT_R8G8B8A8_UNORM }, { DRM_FORMAT_XBGR, VK_FORMAT_R8G8B8A8_UNORM }, + +// All these DRM_FORMATs were added in the same libdrm commit. +#ifdef DRM_FORMAT_XYUV +{ DRM_FORMAT_XYUV, VK_FORMAT_R8G8B8A8_UNORM }, +{ DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R16G16B16A16_UNORM} , +// As we had to map XV36 to a 16bit Vulkan format, reverse mapping will +// end up yielding Y416 as the DRM format, so we need to recognise it. +{ DRM_FORMAT_Y416, VK_FORMAT_R16G16B16A16_UNORM }, +#endif }; static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc) diff --git a/libavutil/version.h b/libavutil/version.h index c6e5b9f145..9b8462c705 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -80,7 +80,7 @@ #define LIBAVUTIL_VERSION_MAJOR 57 #define LIBAVUTIL_VERSION_MINOR 36 -#define LIBAVUTIL_VERSION_MICRO 100 +#define LIBAVUTIL_VERSION_MICRO 101 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/vaapi: Add support for remaining 10/12bit profiles
ffmpeg | branch: master | Philip Langdale | Fri Aug 19 17:01:07 2022 -0700| [b982dd0d8366b2cb9cc6288b821a536c8e2b50ed] | committer: Philip Langdale lavc/vaapi: Add support for remaining 10/12bit profiles With the necessary pixel formats defined, we can now expose support for the remaining 10/12bit combinations that VAAPI can handle. Specifically, we are adding support for: * HEVC ** 12bit 420 ** 10bit 422 ** 12bit 422 ** 10bit 444 ** 12bit 444 * VP9 ** 10bit 444 ** 12bit 444 These obviously require actual hardware support to be usable, but where that exists, it is now enabled. Note that unlike YUVA/YUVX, the Intel driver does not formally expose support for the alphaless formats XV30 and XV360, and so we are implicitly discarding the alpha from the decoder and passing undefined values for the alpha to the encoder. If a future encoder iteration was to actually do something with the alpha bits, we would need to use a formal alpha capable format or the encoder would need to explicitly accept the alphaless format. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b982dd0d8366b2cb9cc6288b821a536c8e2b50ed --- Changelog | 2 +- libavcodec/hevcdec.c | 8 libavcodec/vaapi_decode.c | 13 + libavcodec/vaapi_encode.c | 4 libavcodec/vaapi_encode_h265.c | 4 libavcodec/vaapi_encode_vp9.c | 1 + libavcodec/vaapi_hevc.c| 11 ++- libavcodec/version.h | 2 +- libavcodec/vp9.c | 2 ++ libavutil/hwcontext_vaapi.c| 25 + 10 files changed, 69 insertions(+), 3 deletions(-) diff --git a/Changelog b/Changelog index 70c12df8dc..f34e8e5d42 100644 --- a/Changelog +++ b/Changelog @@ -8,7 +8,7 @@ version : - ffmpeg now requires threading to be built - ffmpeg now runs every muxer in a separate thread - Add new mode to cropdetect filter to detect crop-area based on motion vectors and edges -- VAAPI decoding and encoding for 8bit 444 HEVC and VP9 +- VAAPI decoding and encoding for 10/12bit 422, 10/12bit 444 HEVC and VP9 - WBMP (Wireless Application Protocol Bitmap) image format - a3dscope filter diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 90961f87be..fb44d8d3f2 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -482,11 +482,19 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif case AV_PIX_FMT_YUV420P12: case AV_PIX_FMT_YUV444P12: +#if CONFIG_HEVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; +#endif #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif #if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; +#endif +break; +case AV_PIX_FMT_YUV422P12: +#if CONFIG_HEVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; #endif break; } diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c index 8c13a4f098..134f10eca5 100644 --- a/libavcodec/vaapi_decode.c +++ b/libavcodec/vaapi_decode.c @@ -262,6 +262,9 @@ static const struct { MAP(YUY2, YUYV422), #ifdef VA_FOURCC_Y210 MAP(Y210,Y210), +#endif +#ifdef VA_FOURCC_Y212 +MAP(Y212,Y212), #endif // 4:4:0 MAP(422V, YUV440P), @@ -269,11 +272,20 @@ static const struct { MAP(444P, YUV444P), #ifdef VA_FOURCC_XYUV MAP(XYUV, VUYX), +#endif +#ifdef VA_FOURCC_Y410 +MAP(Y410,XV30), +#endif +#ifdef VA_FOURCC_Y412 +MAP(Y412,XV36), #endif // 4:2:0 10-bit #ifdef VA_FOURCC_P010 MAP(P010, P010), #endif +#ifdef VA_FOURCC_P012 +MAP(P012, P012), +#endif #ifdef VA_FOURCC_I010 MAP(I010, YUV420P10), #endif @@ -417,6 +429,7 @@ static const struct { #if VA_CHECK_VERSION(0, 39, 0) MAP(VP9, VP9_1, VP9Profile1 ), MAP(VP9, VP9_2, VP9Profile2 ), +MAP(VP9, VP9_3, VP9Profile3 ), #endif #if VA_CHECK_VERSION(1, 8, 0) MAP(AV1, AV1_MAIN,AV1Profile0), diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c index 2dc5c96f7b..9a58661b51 100644 --- a/libavcodec/vaapi_encode.c +++ b/libavcodec/vaapi_encode.c @@ -1305,7 +1305,11 @@ static const VAAPIEncodeRTFormat vaapi_encode_rt_formats[] = { { "YUV420",VA_RT_FORMAT_YUV420,8, 3, 1, 1 }, { "YUV422",VA_RT_FORMAT_YUV422,8, 3, 1, 0 }, #if VA_CHECK_VERSION(1, 2, 0) +{ "YUV420_12", VA_RT_FORMAT_YUV420_12,12, 3, 1, 1 }, { "YUV422_10", VA_RT_FORMAT_YUV422_10,10, 3, 1, 0 }, +{ "YUV422_12", VA_RT_FORMAT_YUV422_12,12, 3, 1, 0 }, +{ "YUV444_10", VA_RT_FORMAT_YUV444_10,10, 3, 0, 0 }, +{ "YUV444_12", VA_RT_FORMAT_YUV444_12,12, 3, 0, 0 }, #endif { "YUV444",VA_RT_FORMAT_YUV444,8, 3, 0, 0 }, { "XYUV", VA_RT_FORMAT_YUV444,8, 3, 0, 0 }, diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec
[FFmpeg-cvslog] lavu/pixfmt: Add P012, Y212, XV30, and XV36 formats
ffmpeg | branch: master | Philip Langdale | Sat Aug 13 13:50:07 2022 -0700| [d75c4693fef51e8f0a1b88798530f4c5147ea906] | committer: Philip Langdale lavu/pixfmt: Add P012, Y212, XV30, and XV36 formats These are the formats we want/need to use when dealing with the Intel VAAPI decoder for 12bit 4:2:0, 12bit 4:2:2, 10bit 4:4:4 and 12bit 4:4:4 respectively. As with the already supported Y210 and YUVX (XVUY) formats, they are based on formats Microsoft picked as their preferred 4:2:2 and 4:4:4 video formats, and Intel ran with it. P12 and Y212 are simply an extension of 10 bit formats to say 12 bits will be used, with 4 unused bits instead of 6. XV30, and XV36, as exotic as they sound, are variants of Y410 and Y412 where the alpha channel is left formally undefined. We prefer these over the alpha versions because the hardware cannot actually do anything with the alpha channel and respecting it is just overhead. Y412/XV46 is a normal looking packed 4 channel format where each channel is 16bits wide but only the 12msb are used (like P012). Y410/XV30 packs three 10bit channels in 32bits with 2bits of alpha, like A/X2RGB10 style formats. This annoying layout forced me to define the BE version as a bitstream format. It seems like our pixdesc infrastructure can handle the LE version being byte-defined, but not when it's reversed. If there's a better way to handle this, please let me know. Our existing X2 formats all have the 2 bits at the MSB end, but this format places them at the LSB end and that seems to be the root of the problem. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d75c4693fef51e8f0a1b88798530f4c5147ea906 --- doc/APIchanges | 3 ++ libavutil/pixdesc.c | 95 +++- libavutil/pixfmt.h | 16 +++ libavutil/version.h | 2 +- tests/ref/fate/imgutils | 8 tests/ref/fate/sws-pixdesc-query | 38 6 files changed, 160 insertions(+), 2 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index b0d0757b13..729f56be7b 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2022-09-03 - xx - lavu 57.36.100 - pixfmt.h + Add AV_PIX_FMT_P012, AV_PIX_FMT_Y212, AV_PIX_FMT_XV30, AV_PIX_FMT_XV36 + 2022-09-03 - xx - lavu 57.35.100 - file.h Deprecate av_tempfile() without replacement. diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 79ebfd3f16..d7c6ebfdc4 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2147,6 +2147,30 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, }, +[AV_PIX_FMT_P012LE] = { +.name = "p012le", +.nb_components = 3, +.log2_chroma_w = 1, +.log2_chroma_h = 1, +.comp = { +{ 0, 2, 0, 4, 12 },/* Y */ +{ 1, 4, 0, 4, 12 },/* U */ +{ 1, 4, 2, 4, 12 },/* V */ +}, +.flags = AV_PIX_FMT_FLAG_PLANAR, +}, +[AV_PIX_FMT_P012BE] = { +.name = "p012be", +.nb_components = 3, +.log2_chroma_w = 1, +.log2_chroma_h = 1, +.comp = { +{ 0, 2, 0, 4, 12 },/* Y */ +{ 1, 4, 0, 4, 12 },/* U */ +{ 1, 4, 2, 4, 12 },/* V */ +}, +.flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, +}, [AV_PIX_FMT_P016LE] = { .name = "p016le", .nb_components = 3, @@ -2543,6 +2567,75 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA | AV_PIX_FMT_FLAG_FLOAT, }, +[AV_PIX_FMT_Y212LE] = { +.name = "y212le", +.nb_components = 3, +.log2_chroma_w = 1, +.log2_chroma_h = 0, +.comp = { +{ 0, 4, 0, 4, 12 },/* Y */ +{ 0, 8, 2, 4, 12 },/* U */ +{ 0, 8, 6, 4, 12 },/* V */ +}, +}, +[AV_PIX_FMT_Y212BE] = { +.name = "y212be", +.nb_components = 3, +.log2_chroma_w = 1, +.log2_chroma_h = 0, +.comp = { +{ 0, 4, 0, 4, 12 },/* Y */ +{ 0, 8, 2, 4, 12 },/* U */ +{ 0, 8, 6, 4, 12 },/* V */ +}, +.flags = AV_PIX_FMT_FLAG_BE, +}, +[AV_PIX_FMT_XV30LE] = { +.name = "xv30le", +.nb_components= 3, +.log2_chroma_w= 0, +.log2_chroma_h= 0, +.comp = { +{ 0, 4, 1, 2, 10 }, /* Y */ +{ 0, 4, 0, 0, 10 }, /* U */ +{ 0, 4, 2, 4, 10 }, /* V */ +}, +}, +[AV_PIX_FMT_XV30BE] = { +.name = "xv30be", +
[FFmpeg-cvslog] lavc/vaapi: Switch preferred 8bit 444 format to VUYX
ffmpeg | branch: master | Philip Langdale | Fri Aug 19 16:55:44 2022 -0700| [caf26a8a126d7b9853568ce0db2f6e04029fd1a2] | committer: Philip Langdale lavc/vaapi: Switch preferred 8bit 444 format to VUYX As vaapi doesn't actually do anything useful with the alpha channel, and we have an alphaless format available, let's use that instead. The changes here are mostly 1:1 switching, but do note the explicit change in the number of declared channels from 4 to 3 to reflect that the alpha is being ignored. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=caf26a8a126d7b9853568ce0db2f6e04029fd1a2 --- libavcodec/vaapi_decode.c | 4 +++- libavcodec/vaapi_encode.c | 2 +- libavcodec/vaapi_encode_h265.c | 3 +-- libavcodec/vaapi_encode_vp9.c | 3 +-- libavcodec/version.h | 2 +- libavutil/hwcontext_vaapi.c| 7 ++- 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c index bc2d3ed803..8c13a4f098 100644 --- a/libavcodec/vaapi_decode.c +++ b/libavcodec/vaapi_decode.c @@ -267,7 +267,9 @@ static const struct { MAP(422V, YUV440P), // 4:4:4 MAP(444P, YUV444P), -MAP(AYUV, VUYA), +#ifdef VA_FOURCC_XYUV +MAP(XYUV, VUYX), +#endif // 4:2:0 10-bit #ifdef VA_FOURCC_P010 MAP(P010, P010), diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c index f13daa5cff..2dc5c96f7b 100644 --- a/libavcodec/vaapi_encode.c +++ b/libavcodec/vaapi_encode.c @@ -1308,7 +1308,7 @@ static const VAAPIEncodeRTFormat vaapi_encode_rt_formats[] = { { "YUV422_10", VA_RT_FORMAT_YUV422_10,10, 3, 1, 0 }, #endif { "YUV444",VA_RT_FORMAT_YUV444,8, 3, 0, 0 }, -{ "AYUV", VA_RT_FORMAT_YUV444,8, 4, 0, 0 }, +{ "XYUV", VA_RT_FORMAT_YUV444,8, 3, 0, 0 }, { "YUV411",VA_RT_FORMAT_YUV411,8, 3, 2, 0 }, #if VA_CHECK_VERSION(0, 38, 1) { "YUV420_10", VA_RT_FORMAT_YUV420_10BPP, 10, 3, 1, 1 }, diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c index 1de323af78..967d71e998 100644 --- a/libavcodec/vaapi_encode_h265.c +++ b/libavcodec/vaapi_encode_h265.c @@ -1278,8 +1278,7 @@ static const VAAPIEncodeProfile vaapi_encode_h265_profiles[] = { #if VA_CHECK_VERSION(1, 2, 0) { FF_PROFILE_HEVC_REXT, 8, 3, 1, 0, VAProfileHEVCMain422_10 }, { FF_PROFILE_HEVC_REXT,10, 3, 1, 0, VAProfileHEVCMain422_10 }, -// Four channels because this uses the AYUV format which has Alpha -{ FF_PROFILE_HEVC_REXT, 8, 4, 0, 0, VAProfileHEVCMain444 }, +{ FF_PROFILE_HEVC_REXT, 8, 3, 0, 0, VAProfileHEVCMain444 }, #endif { FF_PROFILE_UNKNOWN } }; diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c index 9b455e10c9..9530b2f462 100644 --- a/libavcodec/vaapi_encode_vp9.c +++ b/libavcodec/vaapi_encode_vp9.c @@ -228,8 +228,7 @@ static av_cold int vaapi_encode_vp9_configure(AVCodecContext *avctx) static const VAAPIEncodeProfile vaapi_encode_vp9_profiles[] = { { FF_PROFILE_VP9_0, 8, 3, 1, 1, VAProfileVP9Profile0 }, -// Four channels because this uses the AYUV format which has Alpha -{ FF_PROFILE_VP9_1, 8, 4, 0, 0, VAProfileVP9Profile1 }, +{ FF_PROFILE_VP9_1, 8, 3, 0, 0, VAProfileVP9Profile1 }, { FF_PROFILE_VP9_2, 10, 3, 1, 1, VAProfileVP9Profile2 }, { FF_PROFILE_UNKNOWN } }; diff --git a/libavcodec/version.h b/libavcodec/version.h index 12421666b9..d7d5fca6b2 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -30,7 +30,7 @@ #include "version_major.h" #define LIBAVCODEC_VERSION_MINOR 42 -#define LIBAVCODEC_VERSION_MICRO 102 +#define LIBAVCODEC_VERSION_MICRO 103 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c index 2ee5145727..78205425ee 100644 --- a/libavutil/hwcontext_vaapi.c +++ b/libavutil/hwcontext_vaapi.c @@ -125,7 +125,9 @@ static const VAAPIFormatDescriptor vaapi_format_map[] = { MAP(411P, YUV411, YUV411P, 0), MAP(422V, YUV422, YUV440P, 0), MAP(444P, YUV444, YUV444P, 0), -MAP(AYUV, YUV444, VUYA,0), +#ifdef VA_FOURCC_XYUV +MAP(XYUV, YUV444, VUYX,0), +#endif MAP(Y800, YUV400, GRAY8, 0), #ifdef VA_FOURCC_P010 MAP(P010, YUV420_10BPP, P010, 0), @@ -1009,6 +1011,9 @@ static const struct { #endif DRM_MAP(ARGB, 1, DRM_FORMAT_BGRA), DRM_MAP(XRGB, 1, DRM_FORMAT_BGRX), +#if defined(VA_FOURCC_XYUV) && defined(DRM_FORMAT_XYUV) +DRM_MAP(XYUV, 1, DRM_FORMAT_XYUV), +#endif }; #undef DRM_MAP ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavu/pixfmt: Introduce VUYX format
ffmpeg | branch: master | Philip Langdale | Fri Aug 19 16:50:44 2022 -0700| [cc5a5c986047d38b53c0f12a227b04487624e7cb] | committer: Philip Langdale lavu/pixfmt: Introduce VUYX format This is the alphaless version of VUYA that I introduced recently. After further discussion and noting that the Intel vaapi driver explicitly lists XYUV as a support format for encoding and decoding 8bit 444 content, we decided to switch our usage and avoid the overhead of having a declared alpha channel around. Note that I am not removing VUYA, as this turned out to have another use, which was to replace the need for v408enc/dec when dealing with the format. The vaapi switching will happen in the next change > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cc5a5c986047d38b53c0f12a227b04487624e7cb --- doc/APIchanges | 3 +++ libavutil/pixdesc.c | 11 +++ libavutil/pixfmt.h | 2 ++ libavutil/tests/pixfmt_best.c| 1 + libavutil/version.h | 4 ++-- tests/ref/fate/imgutils | 1 + tests/ref/fate/pixfmt_best | 2 +- tests/ref/fate/sws-pixdesc-query | 3 +++ 8 files changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 4c0c9db628..1cd13d4ed3 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2022-08-03 - xx - lavu 57.34.100 - pixfmt.h + Add AV_PIX_FMT_VUYX. + 2022-08-xx - xx - lavf 59 - avformat.h Deprecate av_stream_get_end_pts() without replacement. diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index f7558ff8b9..79ebfd3f16 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2504,6 +2504,17 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .flags = AV_PIX_FMT_FLAG_ALPHA, }, +[AV_PIX_FMT_VUYX] = { +.name = "vuyx", +.nb_components = 3, +.log2_chroma_w = 0, +.log2_chroma_h = 0, +.comp = { +{ 0, 4, 2, 0, 8 },/* Y */ +{ 0, 4, 1, 0, 8 },/* U */ +{ 0, 4, 0, 0, 8 },/* V */ +}, +}, [AV_PIX_FMT_RGBAF16BE] = { .name = "rgbaf16be", .nb_components = 4, diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 86c9bdefeb..7d45561395 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -372,6 +372,8 @@ enum AVPixelFormat { AV_PIX_FMT_RGBAF16BE, ///< IEEE-754 half precision packed RGBA 16:16:16:16, 64bpp, RGBARGBA..., big-endian AV_PIX_FMT_RGBAF16LE, ///< IEEE-754 half precision packed RGBA 16:16:16:16, 64bpp, RGBARGBA..., little-endian +AV_PIX_FMT_VUYX,///< packed VUYX 4:4:4, 32bpp, Variant of VUYA where alpha channel is left undefined + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; diff --git a/libavutil/tests/pixfmt_best.c b/libavutil/tests/pixfmt_best.c index de53baf092..0542af494f 100644 --- a/libavutil/tests/pixfmt_best.c +++ b/libavutil/tests/pixfmt_best.c @@ -84,6 +84,7 @@ int main(void) TEST(AV_PIX_FMT_GBRP, AV_PIX_FMT_RGB24); TEST(AV_PIX_FMT_0RGB, AV_PIX_FMT_RGB24); TEST(AV_PIX_FMT_GBRP16,AV_PIX_FMT_RGB48); +TEST(AV_PIX_FMT_VUYX, AV_PIX_FMT_YUV444P); // Formats additionally containing alpha (here ignored). TEST(AV_PIX_FMT_YA8, AV_PIX_FMT_GRAY8); diff --git a/libavutil/version.h b/libavutil/version.h index 05661922b3..5d0df781cc 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,8 +79,8 @@ */ #define LIBAVUTIL_VERSION_MAJOR 57 -#define LIBAVUTIL_VERSION_MINOR 33 -#define LIBAVUTIL_VERSION_MICRO 101 +#define LIBAVUTIL_VERSION_MINOR 34 +#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils index 01c9877de5..47b73b1b64 100644 --- a/tests/ref/fate/imgutils +++ b/tests/ref/fate/imgutils @@ -249,3 +249,4 @@ p416le planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 vuyaplanes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 rgbaf16be planes: 1, linesizes: 512 0 0 0, plane_sizes: 24576 0 0 0, plane_offsets: 0 0 0, total_size: 24576 rgbaf16le planes: 1, linesizes: 512 0 0 0, plane_sizes: 24576 0 0 0, plane_offsets: 0 0 0, total_size: 24576 +vuyxplanes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 diff --git a/tests/ref/fate/pixfmt_best
[FFmpeg-cvslog] libswscale: add support for VUYX format
ffmpeg | branch: master | Philip Langdale | Fri Aug 19 16:53:37 2022 -0700| [45726aa1177ee7d9d17435f879c96ab3537d8ad3] | committer: Philip Langdale libswscale: add support for VUYX format As we already have support for VUYA, I figured I should do the small amount of work to support VUYX as well. That means a little refactoring to share code. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=45726aa1177ee7d9d17435f879c96ab3537d8ad3 --- libswscale/input.c | 10 + libswscale/output.c | 35 libswscale/utils.c | 1 + libswscale/version.h | 2 +- tests/ref/fate/filter-pixdesc-vuyx | 1 + tests/ref/fate/filter-pixfmts-copy | 1 + tests/ref/fate/filter-pixfmts-crop | 1 + tests/ref/fate/filter-pixfmts-field | 1 + tests/ref/fate/filter-pixfmts-fieldorder | 1 + tests/ref/fate/filter-pixfmts-hflip | 1 + tests/ref/fate/filter-pixfmts-il | 1 + tests/ref/fate/filter-pixfmts-null | 1 + tests/ref/fate/filter-pixfmts-pad| 1 + tests/ref/fate/filter-pixfmts-scale | 1 + tests/ref/fate/filter-pixfmts-transpose | 1 + tests/ref/fate/filter-pixfmts-vflip | 1 + 16 files changed, 51 insertions(+), 9 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 1077d01e91..92681c9c53 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -659,7 +659,7 @@ static void read_ayuv64le_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *u AV_WN16(dst + i * 2, AV_RL16(src + i * 8)); } -static void read_vuya_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, +static void read_vuyx_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, const uint8_t *unused1, int width, uint32_t *unused2, void *opq) { int i; @@ -669,7 +669,7 @@ static void read_vuya_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, } } -static void read_vuya_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, +static void read_vuyx_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, uint32_t *unused2, void *opq) { int i; @@ -1375,7 +1375,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) break; #endif case AV_PIX_FMT_VUYA: -c->chrToYV12 = read_vuya_UV_c; +case AV_PIX_FMT_VUYX: +c->chrToYV12 = read_vuyx_UV_c; break; case AV_PIX_FMT_AYUV64LE: c->chrToYV12 = read_ayuv64le_UV_c; @@ -1752,7 +1753,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->lumToYV12 = read_ya16be_gray_c; break; case AV_PIX_FMT_VUYA: -c->lumToYV12 = read_vuya_Y_c; +case AV_PIX_FMT_VUYX: +c->lumToYV12 = read_vuyx_Y_c; break; case AV_PIX_FMT_AYUV64LE: c->lumToYV12 = read_ayuv64le_Y_c; diff --git a/libswscale/output.c b/libswscale/output.c index 74f992ae80..40a4476c6d 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -2585,13 +2585,14 @@ yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter, } static void -yuv2vuya_X_c(SwsContext *c, const int16_t *lumFilter, +yuv2vuyX_X_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, int dstW, int y) + const int16_t **alpSrc, uint8_t *dest, int dstW, int y, + int destHasAlpha) { -int hasAlpha = !!alpSrc; +int hasAlpha = destHasAlpha && (!!alpSrc); int i; for (i = 0; i < dstW; i++) { @@ -2634,10 +2635,33 @@ yuv2vuya_X_c(SwsContext *c, const int16_t *lumFilter, dest[4 * i] = V; dest[4 * i + 1] = U; dest[4 * i + 2] = Y; -dest[4 * i + 3] = A; +if (destHasAlpha) +dest[4 * i + 3] = A; } } +static void +yuv2vuya_X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, int y) +{ +yuv2vuyX_X_c(c, lumFilter, lumSrc, lumFilterSize, chrFilter, + chrUSrc, chrVSrc, chrFilterSize, alpSrc, dest, dstW, y, 1); +} + +static void +yuv2vuyx_X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, int y) +{ +
[FFmpeg-cvslog] lavc/vaapi_decode: add missing flag when picking best pixel format
ffmpeg | branch: master | Philip Langdale | Thu Aug 4 20:24:48 2022 -0700| [737298b4f7b60bc2b755fe8fa9135f50a496d94d] | committer: Philip Langdale lavc/vaapi_decode: add missing flag when picking best pixel format vaapi_decode_find_best_format currently does not set the VA_SURFACE_ATTRIB_SETTABLE flag on the pixel format attribute that it returns. Without this flag, the attribute will be ignored by vaCreateSurfaces, meaning that the driver's default logic for picking a pixel format will kick in. So far, this hasn't produced visible problems, but when trying to decode 4:4:4 content, at least on Intel, the driver will pick the 444P planar format, even though the decoder can only return the AYUV packed format. The hwcontext_vaapi code that sets surface attributes when picking formats does not have this bug. Applications may use their own logic for finding the best format, and so may not hit this bug. eg: mpv is unaffected. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=737298b4f7b60bc2b755fe8fa9135f50a496d94d --- libavcodec/vaapi_decode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c index db48efc3ed..bc2d3ed803 100644 --- a/libavcodec/vaapi_decode.c +++ b/libavcodec/vaapi_decode.c @@ -358,6 +358,8 @@ static int vaapi_decode_find_best_format(AVCodecContext *avctx, ctx->pixel_format_attribute = (VASurfaceAttrib) { .type = VASurfaceAttribPixelFormat, +.flags = VA_SURFACE_ATTRIB_SETTABLE, +.value.type= VAGenericValueTypeInteger, .value.value.i = best_fourcc, }; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/vaapi_encode: enable 8bit 4:4:4 encoding for HEVC and VP9
ffmpeg | branch: master | Philip Langdale | Thu Aug 4 20:35:04 2022 -0700| [109515e16dfffa6bb34de75c5253b7cbb1f12fa6] | committer: Philip Langdale lavc/vaapi_encode: enable 8bit 4:4:4 encoding for HEVC and VP9 Sufficiently recent Intel hardware is able to do encoding of 8bit 4:4:4 content in HEVC and VP9. The main requirement here is that the frames must be provided in the AYUV format. Enabling support is done by adding the appropriate encoding profiles and noting that AYUV is officially a four channel format with alpha so we must state that we expect all four channels. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=109515e16dfffa6bb34de75c5253b7cbb1f12fa6 --- Changelog | 2 +- libavcodec/vaapi_encode.c | 1 + libavcodec/vaapi_encode_h265.c | 2 ++ libavcodec/vaapi_encode_vp9.c | 2 ++ libavcodec/version.h | 2 +- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Changelog b/Changelog index cc271c22bd..fa83786a20 100644 --- a/Changelog +++ b/Changelog @@ -8,7 +8,7 @@ version : - ffmpeg now requires threading to be built - ffmpeg now runs every muxer in a separate thread - Add new mode to cropdetect filter to detect crop-area based on motion vectors and edges -- VAAPI hwaccel for 8bit 444 HEVC and VP9 +- VAAPI decoding and encoding for 8bit 444 HEVC and VP9 - WBMP (Wireless Application Protocol Bitmap) image format diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c index 284ce29888..f13daa5cff 100644 --- a/libavcodec/vaapi_encode.c +++ b/libavcodec/vaapi_encode.c @@ -1308,6 +1308,7 @@ static const VAAPIEncodeRTFormat vaapi_encode_rt_formats[] = { { "YUV422_10", VA_RT_FORMAT_YUV422_10,10, 3, 1, 0 }, #endif { "YUV444",VA_RT_FORMAT_YUV444,8, 3, 0, 0 }, +{ "AYUV", VA_RT_FORMAT_YUV444,8, 4, 0, 0 }, { "YUV411",VA_RT_FORMAT_YUV411,8, 3, 2, 0 }, #if VA_CHECK_VERSION(0, 38, 1) { "YUV420_10", VA_RT_FORMAT_YUV420_10BPP, 10, 3, 1, 1 }, diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c index d5375add22..1de323af78 100644 --- a/libavcodec/vaapi_encode_h265.c +++ b/libavcodec/vaapi_encode_h265.c @@ -1278,6 +1278,8 @@ static const VAAPIEncodeProfile vaapi_encode_h265_profiles[] = { #if VA_CHECK_VERSION(1, 2, 0) { FF_PROFILE_HEVC_REXT, 8, 3, 1, 0, VAProfileHEVCMain422_10 }, { FF_PROFILE_HEVC_REXT,10, 3, 1, 0, VAProfileHEVCMain422_10 }, +// Four channels because this uses the AYUV format which has Alpha +{ FF_PROFILE_HEVC_REXT, 8, 4, 0, 0, VAProfileHEVCMain444 }, #endif { FF_PROFILE_UNKNOWN } }; diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c index 892ad770c6..9b455e10c9 100644 --- a/libavcodec/vaapi_encode_vp9.c +++ b/libavcodec/vaapi_encode_vp9.c @@ -228,6 +228,8 @@ static av_cold int vaapi_encode_vp9_configure(AVCodecContext *avctx) static const VAAPIEncodeProfile vaapi_encode_vp9_profiles[] = { { FF_PROFILE_VP9_0, 8, 3, 1, 1, VAProfileVP9Profile0 }, +// Four channels because this uses the AYUV format which has Alpha +{ FF_PROFILE_VP9_1, 8, 4, 0, 0, VAProfileVP9Profile1 }, { FF_PROFILE_VP9_2, 10, 3, 1, 1, VAProfileVP9Profile2 }, { FF_PROFILE_UNKNOWN } }; diff --git a/libavcodec/version.h b/libavcodec/version.h index e488eee355..777be76edf 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -30,7 +30,7 @@ #include "version_major.h" #define LIBAVCODEC_VERSION_MINOR 42 -#define LIBAVCODEC_VERSION_MICRO 100 +#define LIBAVCODEC_VERSION_MICRO 101 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavu/hwcontext_vaapi: Map the AYUV format
ffmpeg | branch: master | Philip Langdale | Sat Jul 23 11:23:38 2022 -0700| [2b720676e070c9920391ef29d35d8ca6a79bf0c6] | committer: Philip Langdale lavu/hwcontext_vaapi: Map the AYUV format This is the format used by Intel VAAPI for 8bit 4:4:4 content. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2b720676e070c9920391ef29d35d8ca6a79bf0c6 --- libavutil/hwcontext_vaapi.c | 1 + libavutil/version.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c index c3a98bc4b1..2ee5145727 100644 --- a/libavutil/hwcontext_vaapi.c +++ b/libavutil/hwcontext_vaapi.c @@ -125,6 +125,7 @@ static const VAAPIFormatDescriptor vaapi_format_map[] = { MAP(411P, YUV411, YUV411P, 0), MAP(422V, YUV422, YUV440P, 0), MAP(444P, YUV444, YUV444P, 0), +MAP(AYUV, YUV444, VUYA,0), MAP(Y800, YUV400, GRAY8, 0), #ifdef VA_FOURCC_P010 MAP(P010, YUV420_10BPP, P010, 0), diff --git a/libavutil/version.h b/libavutil/version.h index 5095743fed..ee43526dc6 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -80,7 +80,7 @@ #define LIBAVUTIL_VERSION_MAJOR 57 #define LIBAVUTIL_VERSION_MINOR 32 -#define LIBAVUTIL_VERSION_MICRO 100 +#define LIBAVUTIL_VERSION_MICRO 101 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/vaapi: Declare support for decoding 8bit 4:4:4 content
ffmpeg | branch: master | Philip Langdale | Sat Jul 23 11:24:33 2022 -0700| [d3f48e68b3236bc3fbf75cc489d53e9f397f5e0a] | committer: Philip Langdale lavc/vaapi: Declare support for decoding 8bit 4:4:4 content Now that we have a combination of capable hardware (new enough Intel) and a mutually understood format ("AYUV"), we can declare support for decoding 8bit 4:4:4 content via VAAPI. This requires listing AYUV as a supported format, and then adding VAAPI as a supported hwaccel for the relevant codecs (HEVC and VP9). I also had to add VP9Profile1 to the set of supported profiles for VAAPI as it was never relevant before. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d3f48e68b3236bc3fbf75cc489d53e9f397f5e0a --- Changelog | 1 + libavcodec/hevcdec.c | 3 +++ libavcodec/vaapi_decode.c | 2 ++ libavcodec/version.h | 2 +- libavcodec/vp9.c | 5 + 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index 40ea7ccecf..d5f3d36094 100644 --- a/Changelog +++ b/Changelog @@ -8,6 +8,7 @@ version : - ffmpeg now requires threading to be built - ffmpeg now runs every muxer in a separate thread - Add new mode to cropdetect filter to detect crop-area based on motion vectors and edges +- VAAPI hwaccel for 8bit 444 HEVC and VP9 version 5.1: diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 9b14fa50f9..539b656c65 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -453,6 +453,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif break; case AV_PIX_FMT_YUV444P: +#if CONFIG_HEVC_VAAPI_HWACCEL +*fmt++ = AV_PIX_FMT_VAAPI; +#endif #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c index a7abddb06b..db48efc3ed 100644 --- a/libavcodec/vaapi_decode.c +++ b/libavcodec/vaapi_decode.c @@ -267,6 +267,7 @@ static const struct { MAP(422V, YUV440P), // 4:4:4 MAP(444P, YUV444P), +MAP(AYUV, VUYA), // 4:2:0 10-bit #ifdef VA_FOURCC_P010 MAP(P010, P010), @@ -410,6 +411,7 @@ static const struct { MAP(VP9, VP9_0, VP9Profile0 ), #endif #if VA_CHECK_VERSION(0, 39, 0) +MAP(VP9, VP9_1, VP9Profile1 ), MAP(VP9, VP9_2, VP9Profile2 ), #endif #if VA_CHECK_VERSION(1, 8, 0) diff --git a/libavcodec/version.h b/libavcodec/version.h index 4a1205c718..751f0d2645 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -30,7 +30,7 @@ #include "version_major.h" #define LIBAVCODEC_VERSION_MINOR 41 -#define LIBAVCODEC_VERSION_MICRO 100 +#define LIBAVCODEC_VERSION_MICRO 101 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index bd2951c92e..db06acd748 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -232,6 +232,11 @@ static int update_size(AVCodecContext *avctx, int w, int h) #endif #if CONFIG_VP9_VDPAU_HWACCEL *fmtp++ = AV_PIX_FMT_VDPAU; +#endif +break; +case AV_PIX_FMT_YUV444P: +#if CONFIG_VP9_VAAPI_HWACCEL +*fmtp++ = AV_PIX_FMT_VAAPI; #endif break; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavu/pixfmt: Add packed 4:4:4 format
ffmpeg | branch: master | Philip Langdale | Sat Jul 23 11:11:59 2022 -0700| [6ab8a9d375ca922b2a94cd7160a4e3c5abe6339c] | committer: Philip Langdale lavu/pixfmt: Add packed 4:4:4 format The "AYUV" format is defined by Microsoft as their preferred format for 4:4:4 content, and so it is the format used by Intel VAAPI and QSV. As Microsoft like to define their byte ordering in little-endian fashion, the memory order is reversed, and so our pix_fmt, which follows memory order, has a reversed name (VUYA). > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6ab8a9d375ca922b2a94cd7160a4e3c5abe6339c --- doc/APIchanges | 3 +++ libavutil/pixdesc.c | 13 + libavutil/pixfmt.h | 2 ++ libavutil/version.h | 2 +- tests/ref/fate/imgutils | 1 + tests/ref/fate/sws-pixdesc-query | 4 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/APIchanges b/doc/APIchanges index 0e9ea4d7c5..0bf2b3cd77 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2022-08-03 - xx - lavu 57.32.100 - pixfmt.h + Add AV_PIX_FMT_VUYA. + 2022-08-xx - xx - lavc 59.41.100 - avcodec.h codec.h Add AV_CODEC_FLAG_RECON_FRAME and AV_CODEC_CAP_ENCODER_RECON_FRAME. avcodec_receive_frame() may now be used on encoders when diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 6e57a82cb6..e078fd5320 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2491,6 +2491,19 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .flags = AV_PIX_FMT_FLAG_PLANAR, }, +[AV_PIX_FMT_VUYA] = { +.name = "vuya", +.nb_components = 4, +.log2_chroma_w = 0, +.log2_chroma_h = 0, +.comp = { +{ 0, 4, 2, 0, 8 },/* Y */ +{ 0, 4, 1, 0, 8 },/* U */ +{ 0, 4, 0, 0, 8 },/* V */ +{ 0, 4, 3, 0, 8 },/* A */ +}, +.flags = AV_PIX_FMT_FLAG_ALPHA, +}, }; static const char * const color_range_names[] = { diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 2d3927cc3f..9d1fdaf82d 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -367,6 +367,8 @@ enum AVPixelFormat { AV_PIX_FMT_P416BE, ///< interleaved chroma YUV 4:4:4, 48bpp, big-endian AV_PIX_FMT_P416LE, ///< interleaved chroma YUV 4:4:4, 48bpp, little-endian +AV_PIX_FMT_VUYA,///< packed VUYA 4:4:4, 32bpp, VUYAVUYA... + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; diff --git a/libavutil/version.h b/libavutil/version.h index e9eefcdb2c..5095743fed 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 57 -#define LIBAVUTIL_VERSION_MINOR 31 +#define LIBAVUTIL_VERSION_MINOR 32 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils index b23d1b4c39..4ec66febb8 100644 --- a/tests/ref/fate/imgutils +++ b/tests/ref/fate/imgutils @@ -246,3 +246,4 @@ p216be planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 p216le planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 0 0, plane_offsets: 6144 0 0, total_size: 12288 p416be planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432 p416le planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432 +vuyaplanes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query index 76104bc5a6..bd0f1fcb82 100644 --- a/tests/ref/fate/sws-pixdesc-query +++ b/tests/ref/fate/sws-pixdesc-query @@ -215,6 +215,7 @@ isYUV: p416le uyvy422 uyyvyy411 + vuya xyz12be xyz12le y210be @@ -654,6 +655,7 @@ ALPHA: rgb32_1 rgba64be rgba64le + vuya ya16be ya16le ya8 @@ -739,6 +741,7 @@ Packed: rgba64le uyvy422 uyyvyy411 + vuya x2bgr10be x2bgr10le x2rgb10be @@ -967,5 +970,6 @@ DataInHighBits: SwappedChroma: nv21 nv42 + vuya yvyu422 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/yadif_cuda: Fix time base for large denominators
ffmpeg | branch: master | Philip Langdale | Sat May 29 13:15:55 2021 -0700| [8f8a7e491da5a4d5f6809dd1e56056c46f0cb123] | committer: Philip Langdale avfilter/yadif_cuda: Fix time base for large denominators This is the same fix applied to regular yadif. Signed-off-by: Philip Langdale > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8f8a7e491da5a4d5f6809dd1e56056c46f0cb123 --- libavfilter/vf_yadif_cuda.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c index 4e41c8b554..bbdbfc1adc 100644 --- a/libavfilter/vf_yadif_cuda.c +++ b/libavfilter/vf_yadif_cuda.c @@ -297,10 +297,9 @@ static int config_output(AVFilterLink *link) goto exit; } -link->time_base.num = ctx->inputs[0]->time_base.num; -link->time_base.den = ctx->inputs[0]->time_base.den * 2; -link->w = ctx->inputs[0]->w; -link->h = ctx->inputs[0]->h; +link->time_base = av_mul_q(ctx->inputs[0]->time_base, (AVRational){1, 2}); +link->w = ctx->inputs[0]->w; +link->h = ctx->inputs[0]->h; if(y->mode & 1) link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate, ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/bwdif: Fix time base for large denominators
ffmpeg | branch: master | Philip Langdale | Sat May 29 13:19:26 2021 -0700| [7885ab3036a3f2a5a1f317880a9a2c002e1239ff] | committer: Philip Langdale avfilter/bwdif: Fix time base for large denominators This is the same fix applied to regular yadif. Signed-off-by: Philip Langdale > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7885ab3036a3f2a5a1f317880a9a2c002e1239ff --- libavfilter/vf_bwdif.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c index faba945c7f..49b6e73f11 100644 --- a/libavfilter/vf_bwdif.c +++ b/libavfilter/vf_bwdif.c @@ -335,10 +335,9 @@ static int config_props(AVFilterLink *link) BWDIFContext *s = link->src->priv; YADIFContext *yadif = >yadif; -link->time_base.num = link->src->inputs[0]->time_base.num; -link->time_base.den = link->src->inputs[0]->time_base.den * 2; -link->w = link->src->inputs[0]->w; -link->h = link->src->inputs[0]->h; +link->time_base = av_mul_q(ctx->inputs[0]->time_base, (AVRational){1, 2}); +link->w = link->src->inputs[0]->w; +link->h = link->src->inputs[0]->h; if(yadif->mode&1) link->frame_rate = av_mul_q(link->src->inputs[0]->frame_rate, (AVRational){2,1}); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/libaom: Support monochrome encoding with libaom >= 2.0.1
ffmpeg | branch: master | Philip Langdale | Mon Dec 7 16:33:29 2020 -0800| [40135829b613f875ce71c2cc2265e74ccc6b4c71] | committer: Philip Langdale avcodec/libaom: Support monochrome encoding with libaom >= 2.0.1 Monochrome encoding with libaom was buggy for a long time, but this was finally sorted out in libaom 2.0.1 (2.0.0 is almost there but was still buggy in realtime mode). We'll keep support for libaom 1.x around until the LTS distros that include it are EOL (which is still a long time from now). Fixes: https://trac.ffmpeg.org/ticket/7599 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=40135829b613f875ce71c2cc2265e74ccc6b4c71 --- Changelog | 1 + libavcodec/libaomenc.c | 42 -- libavcodec/version.h | 2 +- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/Changelog b/Changelog index 503317dfae..8f5e849f8d 100644 --- a/Changelog +++ b/Changelog @@ -51,6 +51,7 @@ version : - asubcut filter - Microsoft Paint (MSP) version 2 decoder - Microsoft Paint (MSP) demuxer +- AV1 monochrome encoding support via libaom >= 2.0.1 version 4.3: diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c index 2b0581b15a..342d0883e4 100644 --- a/libavcodec/libaomenc.c +++ b/libavcodec/libaomenc.c @@ -338,6 +338,9 @@ static int set_pix_fmt(AVCodecContext *avctx, aom_codec_caps_t codec_caps, const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); enccfg->g_bit_depth = enccfg->g_input_bit_depth = desc->comp[0].depth; switch (avctx->pix_fmt) { +case AV_PIX_FMT_GRAY8: +enccfg->monochrome = 1; +/* Fall-through */ case AV_PIX_FMT_YUV420P: enccfg->g_profile = FF_PROFILE_AV1_MAIN; *img_fmt = AOM_IMG_FMT_I420; @@ -351,6 +354,10 @@ static int set_pix_fmt(AVCodecContext *avctx, aom_codec_caps_t codec_caps, enccfg->g_profile = FF_PROFILE_AV1_HIGH; *img_fmt = AOM_IMG_FMT_I444; return 0; +case AV_PIX_FMT_GRAY10: +case AV_PIX_FMT_GRAY12: +enccfg->monochrome = 1; +/* Fall-through */ case AV_PIX_FMT_YUV420P10: case AV_PIX_FMT_YUV420P12: if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) { @@ -1158,6 +1165,15 @@ static const enum AVPixelFormat av1_pix_fmts[] = { AV_PIX_FMT_NONE }; +static const enum AVPixelFormat av1_pix_fmts_with_gray[] = { +AV_PIX_FMT_YUV420P, +AV_PIX_FMT_YUV422P, +AV_PIX_FMT_YUV444P, +AV_PIX_FMT_GBRP, +AV_PIX_FMT_GRAY8, +AV_PIX_FMT_NONE +}; + static const enum AVPixelFormat av1_pix_fmts_highbd[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, @@ -1174,13 +1190,35 @@ static const enum AVPixelFormat av1_pix_fmts_highbd[] = { AV_PIX_FMT_NONE }; +static const enum AVPixelFormat av1_pix_fmts_highbd_with_gray[] = { +AV_PIX_FMT_YUV420P, +AV_PIX_FMT_YUV422P, +AV_PIX_FMT_YUV444P, +AV_PIX_FMT_GBRP, +AV_PIX_FMT_YUV420P10, +AV_PIX_FMT_YUV422P10, +AV_PIX_FMT_YUV444P10, +AV_PIX_FMT_YUV420P12, +AV_PIX_FMT_YUV422P12, +AV_PIX_FMT_YUV444P12, +AV_PIX_FMT_GBRP10, +AV_PIX_FMT_GBRP12, +AV_PIX_FMT_GRAY8, +AV_PIX_FMT_GRAY10, +AV_PIX_FMT_GRAY12, +AV_PIX_FMT_NONE +}; + static av_cold void av1_init_static(AVCodec *codec) { +int supports_monochrome = aom_codec_version() >= 20001; aom_codec_caps_t codec_caps = aom_codec_get_caps(aom_codec_av1_cx()); if (codec_caps & AOM_CODEC_CAP_HIGHBITDEPTH) -codec->pix_fmts = av1_pix_fmts_highbd; +codec->pix_fmts = supports_monochrome ? av1_pix_fmts_highbd_with_gray : +av1_pix_fmts_highbd; else -codec->pix_fmts = av1_pix_fmts; +codec->pix_fmts = supports_monochrome ? av1_pix_fmts_with_gray : +av1_pix_fmts; if (aom_codec_version_major() < 2) codec->capabilities |= AV_CODEC_CAP_EXPERIMENTAL; diff --git a/libavcodec/version.h b/libavcodec/version.h index 1c10d105f6..5b92afe60a 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 58 #define LIBAVCODEC_VERSION_MINOR 115 -#define LIBAVCODEC_VERSION_MICRO 101 +#define LIBAVCODEC_VERSION_MICRO 102 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/nvdec: Add support for decoding monochrome av1
ffmpeg | branch: master | Philip Langdale | Sat Dec 5 20:25:29 2020 -0800| [67bb11b5f6548c3b273b575f44077db19bb9a98e] | committer: Philip Langdale avcodec/nvdec: Add support for decoding monochrome av1 The nvidia hardware explicitly supports decoding monochrome content, presumably for the AVIF alpha channel. Supporting this requires an adjustment in av1dec and explicit monochrome detection in nvdec. I'm not sure why the monochrome path in av1dec did what it did - it seems non-functional - YUV440P doesn't seem a logical pix_fmt for monochrome and conditioning on chroma sub-sampling doesn't make sense. So I changed it. I've tested 8bit content, but I haven't found a way to create a 10bit sample, so that path is untested for now. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=67bb11b5f6548c3b273b575f44077db19bb9a98e --- libavcodec/av1dec.c | 19 --- libavcodec/nvdec.c | 3 +++ libavcodec/version.h | 2 +- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index d7b2ac9d46..bc897af9cf 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -387,9 +387,12 @@ static int get_pixel_format(AVCodecContext *avctx) av_log(avctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n"); } } else { -if (seq->color_config.subsampling_x == 1 && -seq->color_config.subsampling_y == 1) -pix_fmt = AV_PIX_FMT_YUV440P; +if (bit_depth == 8) +pix_fmt = AV_PIX_FMT_GRAY8; +else if (bit_depth == 10) +pix_fmt = AV_PIX_FMT_GRAY10; +else if (bit_depth == 12) +pix_fmt = AV_PIX_FMT_GRAY12; else av_log(avctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n"); } @@ -430,6 +433,16 @@ static int get_pixel_format(AVCodecContext *avctx) #endif #if CONFIG_AV1_VAAPI_HWACCEL *fmtp++ = AV_PIX_FMT_VAAPI; +#endif +break; +case AV_PIX_FMT_GRAY8: +#if CONFIG_AV1_NVDEC_HWACCEL +*fmtp++ = AV_PIX_FMT_CUDA; +#endif +break; +case AV_PIX_FMT_GRAY10: +#if CONFIG_AV1_NVDEC_HWACCEL +*fmtp++ = AV_PIX_FMT_CUDA; #endif break; } diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 48281293ce..23c84d9acf 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -83,6 +83,9 @@ static int map_chroma_format(enum AVPixelFormat pix_fmt) { int shift_h = 0, shift_v = 0; +if (av_pix_fmt_count_planes(pix_fmt) == 1) +return cudaVideoChromaFormat_Monochrome; + av_pix_fmt_get_chroma_sub_sample(pix_fmt, _h, _v); if (shift_h == 1 && shift_v == 1) diff --git a/libavcodec/version.h b/libavcodec/version.h index 4ee221b7f2..1c10d105f6 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 58 #define LIBAVCODEC_VERSION_MINOR 115 -#define LIBAVCODEC_VERSION_MICRO 100 +#define LIBAVCODEC_VERSION_MICRO 101 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] configure: Add additional glslang libraries to make linking work
ffmpeg | branch: master | Philip Langdale | Mon Aug 17 16:19:39 2020 -0700| [98ea1a662ee172961feaa374fe8d26078838d250] | committer: Philip Langdale configure: Add additional glslang libraries to make linking work The latest builds of glslang introduce new libraries that need to be linked for all symbols to be fully resolved. This change will break building against older installations of glslang and it's very hard to tell them apart as the library change upstream was not accompanied by any version bump and no official release has been made with this change it - just lots of people packaging up git snapshots. So, apologies in advance. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=98ea1a662ee172961feaa374fe8d26078838d250 --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 9ff246b07f..f13d1a8c9a 100755 --- a/configure +++ b/configure @@ -6347,7 +6347,7 @@ enabled fontconfig&& enable libfontconfig enabled libfontconfig && require_pkg_config libfontconfig fontconfig "fontconfig/fontconfig.h" FcInit enabled libfreetype && require_pkg_config libfreetype freetype2 "ft2build.h FT_FREETYPE_H" FT_Init_FreeType enabled libfribidi&& require_pkg_config libfribidi fribidi fribidi.h fribidi_version_info -enabled libglslang&& require_cpp libglslang glslang/SPIRV/GlslangToSpv.h "glslang::TIntermediate*" -lglslang -lOSDependent -lHLSL -lOGLCompiler -lSPVRemapper -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lpthread -lstdc++ +enabled libglslang&& require_cpp libglslang glslang/SPIRV/GlslangToSpv.h "glslang::TIntermediate*" -lglslang -lMachineIndependent -lOSDependent -lHLSL -lOGLCompiler -lGenericCodeGen -lSPVRemapper -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lpthread -lstdc++ enabled libgme&& { check_pkg_config libgme libgme gme/gme.h gme_new_emu || require libgme gme/gme.h gme_new_emu -lgme -lstdc++; } enabled libgsm&& { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil/hwcontext_vdpau: Correctly initialise pixfmts
ffmpeg | branch: master | Philip Langdale | Sat Jul 11 20:23:03 2020 -0700| [93febc4e15f78277f8532597c76c8092e65af71f] | committer: Philip Langdale avutil/hwcontext_vdpau: Correctly initialise pixfmts The number of declared vdpau formats can vary depending on which version of libvdpau we build against, so the number of pix fmts can vary too. Let's make sure we keep those numbers in sync. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=93febc4e15f78277f8532597c76c8092e65af71f --- libavutil/hwcontext_vdpau.c | 42 +- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/libavutil/hwcontext_vdpau.c b/libavutil/hwcontext_vdpau.c index dbef5495af..8f014eaba7 100644 --- a/libavutil/hwcontext_vdpau.c +++ b/libavutil/hwcontext_vdpau.c @@ -32,27 +32,6 @@ #include "pixfmt.h" #include "pixdesc.h" -typedef struct VDPAUDeviceContext { -VdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities *get_transfer_caps; -VdpVideoSurfaceGetBitsYCbCr *get_data; -VdpVideoSurfacePutBitsYCbCr *put_data; -VdpVideoSurfaceCreate *surf_create; -VdpVideoSurfaceDestroy *surf_destroy; - -enum AVPixelFormat *pix_fmts[8]; -int nb_pix_fmts[8]; -} VDPAUDeviceContext; - -typedef struct VDPAUFramesContext { -VdpVideoSurfaceGetBitsYCbCr *get_data; -VdpVideoSurfacePutBitsYCbCr *put_data; -VdpChromaType chroma_type; -int chroma_idx; - -const enum AVPixelFormat *pix_fmts; -int nb_pix_fmts; -} VDPAUFramesContext; - typedef struct VDPAUPixFmtMap { VdpYCbCrFormat vdpau_fmt; enum AVPixelFormat pix_fmt; @@ -103,6 +82,27 @@ static const struct { #endif }; +typedef struct VDPAUDeviceContext { +VdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities *get_transfer_caps; +VdpVideoSurfaceGetBitsYCbCr *get_data; +VdpVideoSurfacePutBitsYCbCr *put_data; +VdpVideoSurfaceCreate *surf_create; +VdpVideoSurfaceDestroy *surf_destroy; + +enum AVPixelFormat *pix_fmts[FF_ARRAY_ELEMS(vdpau_pix_fmts)]; +int nb_pix_fmts[FF_ARRAY_ELEMS(vdpau_pix_fmts)]; +} VDPAUDeviceContext; + +typedef struct VDPAUFramesContext { +VdpVideoSurfaceGetBitsYCbCr *get_data; +VdpVideoSurfacePutBitsYCbCr *put_data; +VdpChromaType chroma_type; +int chroma_idx; + +const enum AVPixelFormat *pix_fmts; +int nb_pix_fmts; +} VDPAUFramesContext; + static int count_pixfmts(const VDPAUPixFmtMap *map) { int count = 0; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec: Add explicit capability flag for encoder flushing
ffmpeg | branch: master | Philip Langdale | Fri Apr 10 13:32:11 2020 -0700| [22b25b3ea5cf5e241e8dde5ddd107a3b1e6eb7a0] | committer: Philip Langdale avcodec: Add explicit capability flag for encoder flushing Previously, there was no way to flush an encoder such that after draining, the encoder could be used again. We generally suggested that clients teardown and replace the encoder instance in these situations. However, for at least some hardware encoders, the cost of this tear down/replace cycle is very high, which can get in the way of some use-cases - for example: segmented encoding with nvenc. To help address that use case, we added support for calling avcodec_flush_buffers() to nvenc and things worked in practice, although it was not clearly documented as to whether this should work or not. There was only one previous example of an encoder implementing the flush callback (audiotoolboxenc) and it's unclear if that was intentional or not. However, it was clear that calling avocdec_flush_buffers() on any other encoder would leave the encoder in an undefined state, and that's not great. As part of cleaning this up, this change introduces a formal capability flag for encoders that support flushing and ensures a flush call is a no-op for any other encoder. This allows client code to check if it is meaningful to call flush on an encoder before actually doing it. I have not attempted to separate the steps taken inside avcodec_flush_buffers() because it's not doing anything that's wrong for an encoder. But I did add a sanity check to reject attempts to flush a frame threaded encoder because I couldn't wrap my head around whether that code path was actually safe or not. As this combination doesn't exist today, we'll deal with it if it ever comes up. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=22b25b3ea5cf5e241e8dde5ddd107a3b1e6eb7a0 --- doc/APIchanges | 6 ++ libavcodec/audiotoolboxenc.c | 3 ++- libavcodec/avcodec.h | 25 - libavcodec/decode.c | 15 +++ libavcodec/nvenc_h264.c | 11 --- libavcodec/nvenc_hevc.c | 6 -- libavcodec/version.h | 4 ++-- 7 files changed, 57 insertions(+), 13 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index 4cc2367e69..e30148dc90 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,12 @@ libavutil: 2017-10-21 API changes, most recent first: +2020-04-15 - xx - lavc 58.79.100 - avcodec.h + Add formal support for calling avcodec_flush_buffers() on encoders. + Encoders that set the cap AV_CODEC_CAP_ENCODER_FLUSH will be flushed. + For all other encoders, the call is now a no-op rather than undefined + behaviour. + 2020-xx-xx - xx - lavc 58.78.100 - avcodec.h codec_desc.h codec_id.h packet.h Move AVCodecDesc-related public API to new header codec_desc.h. Move AVCodecID enum to new header codec_id.h. diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c index 2c1891693e..27632decf5 100644 --- a/libavcodec/audiotoolboxenc.c +++ b/libavcodec/audiotoolboxenc.c @@ -627,7 +627,8 @@ static const AVOption options[] = { .encode2= ffat_encode, \ .flush = ffat_encode_flush, \ .priv_class = _##NAME##_enc_class, \ -.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY __VA_ARGS__, \ +.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | \ + AV_CODEC_CAP_ENCODER_FLUSH __VA_ARGS__, \ .sample_fmts= (const enum AVSampleFormat[]) { \ AV_SAMPLE_FMT_S16, \ AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_NONE \ diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 55151a0b71..b79b025e53 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -513,6 +513,13 @@ typedef struct RcOverride{ */ #define AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE (1 << 20) +/** + * This encoder can be flushed using avcodec_flush_buffers(). If this flag is + * not set, the encoder must be closed and reopened to ensure that no frames + * remain pending. + */ +#define AV_CODEC_CAP_ENCODER_FLUSH (1 << 21) + /* Exported side data. These flags can be passed in AVCodecContext.export_side_data before initialization. */ @@ -4473,13 +4480,21 @@ int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels, int buf_size, int align); /** - * Reset the internal decoder state / flush internal buffers. Should be called + * Reset the internal codec state / flush internal buffers. Should be called * e.g. when seeking or when switching to a different stream. * - * @note when refcounted frames are not used (i.e. avctx->refcounted_frames is 0), - * this invalidates the frames previously returned from the decoder. When - * refcounted frames are used, the decoder just releases any references it might - * keep internally, but the caller's refer
[FFmpeg-cvslog] Changelog: Add entry for expanded styling support in movtext
ffmpeg | branch: master | Philip Langdale | Fri Apr 10 09:30:53 2020 -0700| [7b0c22768e5b54b7932fcff0af4ccf59f0c235f2] | committer: Philip Langdale Changelog: Add entry for expanded styling support in movtext > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7b0c22768e5b54b7932fcff0af4ccf59f0c235f2 --- Changelog| 1 + libavcodec/version.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index b0c016185e..6dfe750d81 100644 --- a/Changelog +++ b/Changelog @@ -57,6 +57,7 @@ version : - overlay_cuda filter - switch from AvxSynth to AviSynth+ on Linux - mv30 decoder +- Expanded styling support for 3GPP Timed Text Subtitles (movtext) version 4.2: diff --git a/libavcodec/version.h b/libavcodec/version.h index e62d1a7925..278f6be0cf 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 58 #define LIBAVCODEC_VERSION_MINOR 78 -#define LIBAVCODEC_VERSION_MICRO 100 +#define LIBAVCODEC_VERSION_MICRO 101 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavu/hwcontext_cuda: refactor context initialisation
ffmpeg | branch: master | Philip Langdale | Tue Dec 31 09:41:57 2019 -0800| [7f149b04520c01002a2c199f85616ec185896839] | committer: Lynne lavu/hwcontext_cuda: refactor context initialisation There's enough going on here now that it should not be duplicated between cuda_device_create and cuda_device_derive. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7f149b04520c01002a2c199f85616ec185896839 --- libavutil/hwcontext_cuda.c | 114 - 1 file changed, 50 insertions(+), 64 deletions(-) diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index ba8ca589f7..a87c280cf7 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -336,57 +336,44 @@ error: return ret; } -static int cuda_device_create(AVHWDeviceContext *device_ctx, - const char *device, - AVDictionary *opts, int flags) -{ +static int cuda_context_init(AVHWDeviceContext *device_ctx, int flags) { AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; CUcontext dummy; -int ret, dev_active = 0, device_idx = 0; +int ret, dev_active = 0; unsigned int dev_flags = 0; const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; -if (device) -device_idx = strtol(device, NULL, 0); - -if (cuda_device_init(device_ctx) < 0) -goto error; - cu = hwctx->internal->cuda_dl; -ret = CHECK_CU(cu->cuInit(0)); -if (ret < 0) -goto error; - -ret = CHECK_CU(cu->cuDeviceGet(>internal->cuda_device, device_idx)); -if (ret < 0) -goto error; - hwctx->internal->flags = flags; if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { -ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, _flags, _active)); +ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, + _flags, _active)); if (ret < 0) -goto error; +return ret; if (dev_active && dev_flags != desired_flags) { av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); -goto error; +return AVERROR(ENOTSUP); } else if (dev_flags != desired_flags) { -ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags)); +ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, + desired_flags)); if (ret < 0) -goto error; +return ret; } -ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(>cuda_ctx, hwctx->internal->cuda_device)); +ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(>cuda_ctx, + hwctx->internal->cuda_device)); if (ret < 0) -goto error; +return ret; } else { -ret = CHECK_CU(cu->cuCtxCreate(>cuda_ctx, desired_flags, hwctx->internal->cuda_device)); +ret = CHECK_CU(cu->cuCtxCreate(>cuda_ctx, desired_flags, + hwctx->internal->cuda_device)); if (ret < 0) -goto error; +return ret; CHECK_CU(cu->cuCtxPopCurrent()); } @@ -397,6 +384,37 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, hwctx->stream = NULL; return 0; +} + +static int cuda_device_create(AVHWDeviceContext *device_ctx, + const char *device, + AVDictionary *opts, int flags) +{ +AVCUDADeviceContext *hwctx = device_ctx->hwctx; +CudaFunctions *cu; +int ret, device_idx = 0; + +if (device) +device_idx = strtol(device, NULL, 0); + +if (cuda_device_init(device_ctx) < 0) +goto error; + +cu = hwctx->internal->cuda_dl; + +ret = CHECK_CU(cu->cuInit(0)); +if (ret < 0) +goto error; + +ret = CHECK_CU(cu->cuDeviceGet(>internal->cuda_device, device_idx)); +if (ret < 0) +goto error; + +ret = cuda_context_init(device_ctx, flags); +if (ret < 0) +goto error; + +return 0; error: cuda_device_uninit(device_ctx); @@ -409,11 +427,7 @@ static int cuda_device_derive(AVHWDeviceContext *device_ctx, AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; const char *src_uuid = NULL; -CUcontext dummy; -int ret, i, device_count, dev_active = 0; -unsigned int dev_flags = 0; - -const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; +int ret, i, device_count; #if CONFIG_VULKAN VkPhysicalDeviceIDProperties vk_idp = { @@ -481,37 +495,9 @@ stat
[FFmpeg-cvslog] lavu/hwcontext: Add support for HW -> HW transfers
ffmpeg | branch: master | Philip Langdale | Wed Oct 23 18:01:52 2019 -0700| [d7210ce7f5418508d6f8eec6e90d978e06a2d49e] | committer: Lynne lavu/hwcontext: Add support for HW -> HW transfers We are beginning to consider scenarios where a given HW Context may be able to transfer frames to another HW Context without passing via system memory - this would usually be when two contexts represent different APIs on the same device (eg: Vulkan and CUDA). This is modelled as a transfer, as we have today, but where both the src and the dst are hardware frames with hw contexts. We need to be careful to ensure the contexts are compatible - particularly, we cannot do transfers where one of the frames has been mapped via a derived frames context - we can only do transfers for frames that were directly allocated by the specified context. Additionally, as we have two hardware contexts, the transfer function could be implemented by either (or indeed both). To handle this uncertainty, we explicitly look for ENOSYS as an indicator to try the transfer in the other direction before giving up. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d7210ce7f5418508d6f8eec6e90d978e06a2d49e --- libavutil/hwcontext.c | 53 +-- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c index f1e404ab20..3189391c07 100644 --- a/libavutil/hwcontext.c +++ b/libavutil/hwcontext.c @@ -444,21 +444,54 @@ int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags) if (!dst->buf[0]) return transfer_data_alloc(dst, src, flags); -if (src->hw_frames_ctx) { -ctx = (AVHWFramesContext*)src->hw_frames_ctx->data; +/* + * Hardware -> Hardware Transfer. + * Unlike Software -> Hardware or Hardware -> Software, the transfer + * function could be provided by either the src or dst, depending on + * the specific combination of hardware. + */ +if (src->hw_frames_ctx && dst->hw_frames_ctx) { +AVHWFramesContext *src_ctx = +(AVHWFramesContext*)src->hw_frames_ctx->data; +AVHWFramesContext *dst_ctx = +(AVHWFramesContext*)dst->hw_frames_ctx->data; + +if (src_ctx->internal->source_frames) { +av_log(src_ctx, AV_LOG_ERROR, + "A device with a derived frame context cannot be used as " + "the source of a HW -> HW transfer."); +return AVERROR(ENOSYS); +} -ret = ctx->internal->hw_type->transfer_data_from(ctx, dst, src); -if (ret < 0) -return ret; -} else if (dst->hw_frames_ctx) { -ctx = (AVHWFramesContext*)dst->hw_frames_ctx->data; +if (dst_ctx->internal->source_frames) { +av_log(src_ctx, AV_LOG_ERROR, + "A device with a derived frame context cannot be used as " + "the destination of a HW -> HW transfer."); +return AVERROR(ENOSYS); +} -ret = ctx->internal->hw_type->transfer_data_to(ctx, dst, src); +ret = src_ctx->internal->hw_type->transfer_data_from(src_ctx, dst, src); +if (ret == AVERROR(ENOSYS)) +ret = dst_ctx->internal->hw_type->transfer_data_to(dst_ctx, dst, src); if (ret < 0) return ret; -} else -return AVERROR(ENOSYS); +} else { +if (src->hw_frames_ctx) { +ctx = (AVHWFramesContext*)src->hw_frames_ctx->data; + +ret = ctx->internal->hw_type->transfer_data_from(ctx, dst, src); +if (ret < 0) +return ret; +} else if (dst->hw_frames_ctx) { +ctx = (AVHWFramesContext*)dst->hw_frames_ctx->data; +ret = ctx->internal->hw_type->transfer_data_to(ctx, dst, src); +if (ret < 0) +return ret; +} else { +return AVERROR(ENOSYS); +} +} return 0; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavfi/vf_hwupload: Add support for HW -> HW transfers
ffmpeg | branch: master | Philip Langdale | Wed Oct 23 18:11:37 2019 -0700| [88d2ccbe9384a1cba44a2909ef896aac50636d11] | committer: Lynne lavfi/vf_hwupload: Add support for HW -> HW transfers As we find ourselves wanting a way to transfer frames between HW devices (or more realistically, between APIs on the same device), it's desirable to have a way to describe the relationship. While we could imagine introducing a `hwtransfer` filter, there is almost no difference from `hwupload`. The main new feature we need is a way to specify the target device. Having a single device for the filter chain is obviously insufficient if we're dealing with two devices. So let's add a way to specify the upload target device, and if none is specified, continue with the existing behaviour. We must also correctly preserve the sw_format on such a transfer. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=88d2ccbe9384a1cba44a2909ef896aac50636d11 --- doc/filters.texi | 13 ++- libavfilter/vf_hwupload.c | 51 +++--- libavfilter/vf_hwupload_cuda.c | 10 - 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index 9b5f4543b8..96dccc45be 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -11981,7 +11981,18 @@ Upload system memory frames to hardware surfaces. The device to upload to must be supplied when the filter is initialised. If using ffmpeg, select the appropriate device with the @option{-filter_hw_device} -option. +option or with the @option{derive_device} option. The input and output devices +must be of different types and compatible - the exact meaning of this is +system-dependent, but typically it means that they must refer to the same +underlying hardware context (for example, refer to the same graphics card). + +The following additional parameters are accepted: + +@table @option +@item derive_device @var{type} +Rather than using the device supplied at initialisation, instead derive a new +device of type @var{type} from the device the input frames exist on. +@end table @anchor{hwupload_cuda} @section hwupload_cuda diff --git a/libavfilter/vf_hwupload.c b/libavfilter/vf_hwupload.c index 50bc7e10f6..7c5dd497b0 100644 --- a/libavfilter/vf_hwupload.c +++ b/libavfilter/vf_hwupload.c @@ -32,10 +32,11 @@ typedef struct HWUploadContext { const AVClass *class; AVBufferRef *hwdevice_ref; -AVHWDeviceContext *hwdevice; AVBufferRef *hwframes_ref; AVHWFramesContext *hwframes; + +char *device_type; } HWUploadContext; static int hwupload_query_formats(AVFilterContext *avctx) @@ -46,17 +47,27 @@ static int hwupload_query_formats(AVFilterContext *avctx) AVFilterFormats *input_formats = NULL; int err, i; -if (!avctx->hw_device_ctx) { +if (ctx->hwdevice_ref) { +/* We already have a specified device. */ +} else if (avctx->hw_device_ctx) { +if (ctx->device_type) { +err = av_hwdevice_ctx_create_derived( +>hwdevice_ref, +av_hwdevice_find_type_by_name(ctx->device_type), +avctx->hw_device_ctx, 0); +if (err < 0) +return err; +} else { +ctx->hwdevice_ref = av_buffer_ref(avctx->hw_device_ctx); +if (!ctx->hwdevice_ref) +return AVERROR(ENOMEM); +} +} else { av_log(ctx, AV_LOG_ERROR, "A hardware device reference is required " "to upload frames to.\n"); return AVERROR(EINVAL); } -ctx->hwdevice_ref = av_buffer_ref(avctx->hw_device_ctx); -if (!ctx->hwdevice_ref) -return AVERROR(ENOMEM); -ctx->hwdevice = (AVHWDeviceContext*)ctx->hwdevice_ref->data; - constraints = av_hwdevice_get_hwframe_constraints(ctx->hwdevice_ref, NULL); if (!constraints) { err = AVERROR(EINVAL); @@ -127,7 +138,13 @@ static int hwupload_config_output(AVFilterLink *outlink) av_get_pix_fmt_name(inlink->format)); ctx->hwframes->format= outlink->format; -ctx->hwframes->sw_format = inlink->format; +if (inlink->hw_frames_ctx) { +AVHWFramesContext *in_hwframe_ctx = +(AVHWFramesContext*)inlink->hw_frames_ctx->data; +ctx->hwframes->sw_format = in_hwframe_ctx->sw_format; +} else { +ctx->hwframes->sw_format = inlink->format; +} ctx->hwframes->width = inlink->w; ctx->hwframes->height= inlink->h; @@ -200,13 +217,21 @@ static av_cold void hwupload_uninit(AVFilterContext *avctx) av_buffer_unref(>hwdevice_ref); } -static const AVClass hwupload_class = { -.class_name = "hwupload", -.item_name = av_default_item_name, -.option = NULL, -.version= LIB
[FFmpeg-cvslog] nvenc: implement flush to help allow an encoder to be re-used
ffmpeg | branch: master | Philip Langdale | Fri Dec 20 15:34:33 2019 -0800| [3ea705767720033754e8d85566460390191ae27d] | committer: Philip Langdale nvenc: implement flush to help allow an encoder to be re-used It can be useful to re-use an encoder instance when doing segmented encodings, and this requires flushing the encoder at the start of each segment. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3ea705767720033754e8d85566460390191ae27d --- libavcodec/nvenc.c | 5 + libavcodec/nvenc.h | 2 ++ libavcodec/nvenc_h264.c | 1 + libavcodec/nvenc_hevc.c | 1 + libavcodec/version.h| 2 +- 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 310e30805d..9a96bf2bba 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -2262,3 +2262,8 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, return 0; } + +av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx) +{ +ff_nvenc_send_frame(avctx, NULL); +} diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index a269bd97bb..c44c81e675 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -214,6 +214,8 @@ int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt); int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet); +void ff_nvenc_encode_flush(AVCodecContext *avctx); + extern const enum AVPixelFormat ff_nvenc_pix_fmts[]; #endif /* AVCODEC_NVENC_H */ diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c index d5c7370aaa..479155fe15 100644 --- a/libavcodec/nvenc_h264.c +++ b/libavcodec/nvenc_h264.c @@ -240,6 +240,7 @@ AVCodec ff_h264_nvenc_encoder = { .receive_packet = ff_nvenc_receive_packet, .encode2= ff_nvenc_encode_frame, .close = ff_nvenc_encode_close, +.flush = ff_nvenc_encode_flush, .priv_data_size = sizeof(NvencContext), .priv_class = _nvenc_class, .defaults = defaults, diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c index c668b97f86..7c9b3848f1 100644 --- a/libavcodec/nvenc_hevc.c +++ b/libavcodec/nvenc_hevc.c @@ -198,6 +198,7 @@ AVCodec ff_hevc_nvenc_encoder = { .receive_packet = ff_nvenc_receive_packet, .encode2= ff_nvenc_encode_frame, .close = ff_nvenc_encode_close, +.flush = ff_nvenc_encode_flush, .priv_data_size = sizeof(NvencContext), .priv_class = _nvenc_class, .defaults = defaults, diff --git a/libavcodec/version.h b/libavcodec/version.h index 77da913df0..1a88432460 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 58 #define LIBAVCODEC_VERSION_MINOR 65 -#define LIBAVCODEC_VERSION_MICRO 102 +#define LIBAVCODEC_VERSION_MICRO 103 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avformat/hls: Set AVFMT_TS_DISCONT flag on HLS input format
ffmpeg | branch: master | Philip Langdale | Sun Oct 27 11:39:08 2019 -0700| [d6ac6650b911f0957e69545d7fc25be6b7728705] | committer: Steven Liu avformat/hls: Set AVFMT_TS_DISCONT flag on HLS input format There have been many reports over the years about problems when taking an HLS stream as input to `ffmpeg` where there are timestamp discontinuities present. This is explicitly supported in the HLS spec (EXT-X-DISCONTINUITY) and often used for ad injection. Various fixes and work-arounds have been proposed over the years, but one step that seems obvious, even if it's not a complete fix, is to mark the HLS input format as supporting discontinuities. This will prevent timestamp fixup logic in ffmpeg.c kicking in that ends up mangling the timestamps unnecessarily. I've tested this out with an example provided by Joe Koberg early last year, and it is sufficient to allow `ffmpeg` to download and mux the stream correctly. Joe had briefly suggested that other situations can still be handled incorrectly, but this seems like a strict improvement. Joe's example: https://s3.amazonaws.com/playon-test-videos/discont_test_new/discont_test.m3u8 Reviewed-by: Steven Liu Reviewed-by: Dennis Mungai > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d6ac6650b911f0957e69545d7fc25be6b7728705 --- libavformat/hls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/hls.c b/libavformat/hls.c index d7f4d5b442..ac151d5ca4 100644 --- a/libavformat/hls.c +++ b/libavformat/hls.c @@ -2326,7 +2326,7 @@ AVInputFormat ff_hls_demuxer = { .long_name = NULL_IF_CONFIG_SMALL("Apple HTTP Live Streaming"), .priv_class = _class, .priv_data_size = sizeof(HLSContext), -.flags = AVFMT_NOGENSEARCH, +.flags = AVFMT_NOGENSEARCH | AVFMT_TS_DISCONT, .read_probe = hls_probe, .read_header= hls_read_header, .read_packet= hls_read_packet, ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/vf_scale_cuda: Simplify output plane addressing
ffmpeg | branch: master | Philip Langdale | Mon May 13 20:07:42 2019 -0700| [13168ecbdc03c25065f2d1394aa6f2447b33600e] | committer: Philip Langdale avfilter/vf_scale_cuda: Simplify output plane addressing I'm not sure why this was written the way it was originally. We initialise the plane addresses correctly in hwcontext_cuda so why try and play games to calculate the plane offsets directly in this code? > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=13168ecbdc03c25065f2d1394aa6f2447b33600e --- libavfilter/vf_scale_cuda.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index a833dcd1a4..b7cdb81081 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -390,12 +390,12 @@ static int scalecuda_resize(AVFilterContext *ctx, out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, - in->data[0]+in->linesize[0]*in->height, in->width/2, in->height/2, in->linesize[0]/2, - out->data[0]+out->linesize[0]*out->height, out->width/2, out->height/2, out->linesize[0]/2, + in->data[1], in->width/2, in->height/2, in->linesize[0]/2, + out->data[1], out->width/2, out->height/2, out->linesize[0]/2, 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, - in->data[0]+ ALIGN_UP((in->linesize[0]*in->height*5)/4, s->tex_alignment), in->width/2, in->height/2, in->linesize[0]/2, - out->data[0]+(out->linesize[0]*out->height*5)/4, out->width/2, out->height/2, out->linesize[0]/2, + in->data[2], in->width/2, in->height/2, in->linesize[0]/2, + out->data[2], out->width/2, out->height/2, out->linesize[0]/2, 1); break; case AV_PIX_FMT_YUV444P: @@ -404,12 +404,12 @@ static int scalecuda_resize(AVFilterContext *ctx, out->data[0], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, - in->data[0]+in->linesize[0]*in->height, in->width, in->height, in->linesize[0], - out->data[0]+out->linesize[0]*out->height, out->width, out->height, out->linesize[0], + in->data[1], in->width, in->height, in->linesize[0], + out->data[1], out->width, out->height, out->linesize[0], 1); call_resize_kernel(ctx, s->cu_func_uchar, 1, - in->data[0]+in->linesize[0]*in->height*2, in->width, in->height, in->linesize[0], - out->data[0]+out->linesize[0]*out->height*2, out->width, out->height, out->linesize[0], + in->data[2], in->width, in->height, in->linesize[0], + out->data[2], out->width, out->height, out->linesize[0], 1); break; case AV_PIX_FMT_YUV444P16: @@ -433,7 +433,7 @@ static int scalecuda_resize(AVFilterContext *ctx, 1); call_resize_kernel(ctx, s->cu_func_uchar2, 2, in->data[1], in->width/2, in->height/2, in->linesize[1], - out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width/2, out->height/2, out->linesize[1]/2, + out->data[1], out->width/2, out->height/2, out->linesize[1]/2, 1); break; case AV_PIX_FMT_P010LE: @@ -443,7 +443,7 @@ static int scalecuda_resize(AVFilterContext *ctx, 2); call_resize_kernel(ctx, s->cu_func_ushort2, 2, in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2, - out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4, + out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4, 2); break; case AV_PIX_FMT_P016LE: @@ -453,7 +453,7 @@ static int scalecuda_resize(AVFilterContext *ctx, 2); call_resize_kernel(ctx, s->cu_func_ushort2, 2,
[FFmpeg-cvslog] avfilter/vf_scale_cuda: Fix incorrect scaling of > 8bit content
ffmpeg | branch: master | Philip Langdale | Mon May 13 19:15:41 2019 -0700| [89bd7554b209a447062c306129d3f36d673cbd4d] | committer: Philip Langdale avfilter/vf_scale_cuda: Fix incorrect scaling of > 8bit content When i converted the filter to use texture objects instead of texture references, I incorrect dropped the `pixel_size` scaling factor when setting `pitchInBytes`. `src_pitch` is in pixels and so must be scaled up. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=89bd7554b209a447062c306129d3f36d673cbd4d --- libavfilter/vf_scale_cuda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index c97a802ddc..ecfd6a1c92 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -357,7 +357,7 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channel .res.pitch2D.numChannels = channels, .res.pitch2D.width = src_width, .res.pitch2D.height = src_height, -.res.pitch2D.pitchInBytes = src_pitch, +.res.pitch2D.pitchInBytes = src_pitch * pixel_size, .res.pitch2D.devPtr = (CUdeviceptr)src_dptr, }; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/vf_scale_cuda: Add support for YUV444P16
ffmpeg | branch: master | Philip Langdale | Mon May 13 20:00:12 2019 -0700| [bfbde996cfa51a28359841369144ad075d603086] | committer: Philip Langdale avfilter/vf_scale_cuda: Add support for YUV444P16 This format is interesting because it's what you get for decoded 10/12bit HEVC 4:4:4. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bfbde996cfa51a28359841369144ad075d603086 --- libavfilter/version.h | 2 +- libavfilter/vf_scale_cuda.c | 17 - 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/libavfilter/version.h b/libavfilter/version.h index 293af36f62..ed3fc7f108 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -31,7 +31,7 @@ #define LIBAVFILTER_VERSION_MAJOR 7 #define LIBAVFILTER_VERSION_MINOR 53 -#define LIBAVFILTER_VERSION_MICRO 100 +#define LIBAVFILTER_VERSION_MICRO 101 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ LIBAVFILTER_VERSION_MINOR, \ diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index ecfd6a1c92..a833dcd1a4 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -43,7 +43,8 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV444P, AV_PIX_FMT_P010, -AV_PIX_FMT_P016 +AV_PIX_FMT_P016, +AV_PIX_FMT_YUV444P16, }; #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) @@ -411,6 +412,20 @@ static int scalecuda_resize(AVFilterContext *ctx, out->data[0]+out->linesize[0]*out->height*2, out->width, out->height, out->linesize[0], 1); break; +case AV_PIX_FMT_YUV444P16: +call_resize_kernel(ctx, s->cu_func_ushort, 1, + in->data[0], in->width, in->height, in->linesize[0] / 2, + out->data[0], out->width, out->height, out->linesize[0] / 2, + 2); +call_resize_kernel(ctx, s->cu_func_ushort, 1, + in->data[1], in->width, in->height, in->linesize[1] / 2, + out->data[1], out->width, out->height, out->linesize[1] / 2, + 2); +call_resize_kernel(ctx, s->cu_func_ushort, 1, + in->data[2], in->width, in->height, in->linesize[2] / 2, + out->data[2], out->width, out->height, out->linesize[2] / 2, + 2); +break; case AV_PIX_FMT_NV12: call_resize_kernel(ctx, s->cu_func_uchar, 1, in->data[0], in->width, in->height, in->linesize[0], ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] swscale: Add support for NV24 and NV42
ffmpeg | branch: master | Philip Langdale | Thu May 9 21:02:09 2019 -0700| [cd483180356c8f206f32393acc52a85c5b76758b] | committer: Philip Langdale swscale: Add support for NV24 and NV42 The implementation is pretty straight-forward. Most of the existing NV12 codepaths work regardless of subsampling and are re-used as is. Where necessary I wrote the slightly different NV24 versions. Finally, the one thing that confused me for a long time was the asm specific x86 path that did an explicit exclusion check for NV12. I replaced that with a semi-planar check and also updated the equivalent PPC code, which Lauri kindly checked. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cd483180356c8f206f32393acc52a85c5b76758b --- libswscale/input.c | 2 ++ libswscale/output.c | 6 ++-- libswscale/ppc/swscale_altivec.c | 3 +- libswscale/ppc/swscale_vsx.c | 3 +- libswscale/swscale_unscaled.c| 51 libswscale/utils.c | 2 ++ libswscale/version.h | 2 +- libswscale/x86/swscale_template.c| 4 +-- tests/ref/fate/filter-pixfmts-copy | 2 ++ tests/ref/fate/filter-pixfmts-crop | 2 ++ tests/ref/fate/filter-pixfmts-field | 2 ++ tests/ref/fate/filter-pixfmts-fieldorder | 2 ++ tests/ref/fate/filter-pixfmts-hflip | 2 ++ tests/ref/fate/filter-pixfmts-il | 2 ++ tests/ref/fate/filter-pixfmts-null | 2 ++ tests/ref/fate/filter-pixfmts-pad| 2 ++ tests/ref/fate/filter-pixfmts-scale | 2 ++ tests/ref/fate/filter-pixfmts-transpose | 2 ++ tests/ref/fate/filter-pixfmts-vflip | 2 ++ tests/ref/fate/sws-pixdesc-query | 6 20 files changed, 92 insertions(+), 9 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index c2dc356b5d..064f8da314 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1020,9 +1020,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->chrToYV12 = uyvyToUV_c; break; case AV_PIX_FMT_NV12: +case AV_PIX_FMT_NV24: c->chrToYV12 = nv12ToUV_c; break; case AV_PIX_FMT_NV21: +case AV_PIX_FMT_NV42: c->chrToYV12 = nv21ToUV_c; break; case AV_PIX_FMT_RGB8: diff --git a/libswscale/output.c b/libswscale/output.c index d3401f0cd1..26b0ff3d48 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -410,7 +410,8 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS const uint8_t *chrDither = c->chrDither8; int i; -if (dstFormat == AV_PIX_FMT_NV12) +if (dstFormat == AV_PIX_FMT_NV12 || +dstFormat == AV_PIX_FMT_NV24) for (i=0; isrcBpc == 8 && c->dstBpc <= 14) { c->hyScale = c->hcScale = hScale_real_altivec; } -if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && -dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && +if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE && !c->needAlpha) { c->yuv2planeX = yuv2planeX_altivec; diff --git a/libswscale/ppc/swscale_vsx.c b/libswscale/ppc/swscale_vsx.c index a617f76741..75dee5ea58 100644 --- a/libswscale/ppc/swscale_vsx.c +++ b/libswscale/ppc/swscale_vsx.c @@ -2096,8 +2096,7 @@ av_cold void ff_sws_init_swscale_vsx(SwsContext *c) : hScale16To15_vsx; } } -if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && -dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && +if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat) && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE && !c->needAlpha) { c->yuv2planeX = yuv2planeX_vsx; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index be04a236d8..e0b9e99373 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -180,6 +180,47 @@ static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } +static int planarToNv24Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, + int srcSliceH, uint8_t *dstParam[], + int dstStride[]) +{ +uint8_t *dst = dstParam[1] + dstStride[1] * srcSliceY; + +copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, + dstParam[0], dstStride[0]); + +if (c->dstFormat == AV_PIX_FMT_NV24) +interleaveBytes(src[1], src[2], dst, c->chrSrcW, srcSlice
[FFmpeg-cvslog] swscale: Add test for isSemiPlanarYUV to pixdesc_query
ffmpeg | branch: master | Philip Langdale | Sat May 11 10:12:47 2019 -0700| [4fa4f1d7a9499032b8fcef8ed075294baf868be7] | committer: Philip Langdale swscale: Add test for isSemiPlanarYUV to pixdesc_query Lauri had asked me what the semi planar formats were and that reminded me that we could add it to pixdesc_query so we know exactly what the list is. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4fa4f1d7a9499032b8fcef8ed075294baf868be7 --- libswscale/tests/pixdesc_query.c | 1 + tests/ref/fate/sws-pixdesc-query | 13 + 2 files changed, 14 insertions(+) diff --git a/libswscale/tests/pixdesc_query.c b/libswscale/tests/pixdesc_query.c index a5585c4314..f6dd8bae68 100644 --- a/libswscale/tests/pixdesc_query.c +++ b/libswscale/tests/pixdesc_query.c @@ -32,6 +32,7 @@ static const struct { {"isBE",isBE}, {"isYUV", isYUV}, {"isPlanarYUV", isPlanarYUV}, +{"isSemiPlanarYUV", isSemiPlanarYUV}, {"isRGB", isRGB}, {"Gray",isGray}, {"RGBinInt",isRGBinInt}, diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query index bc8147e3c7..e23492293e 100644 --- a/tests/ref/fate/sws-pixdesc-query +++ b/tests/ref/fate/sws-pixdesc-query @@ -347,6 +347,19 @@ isPlanarYUV: yuvj440p yuvj444p +isSemiPlanarYUV: + nv12 + nv16 + nv20be + nv20le + nv21 + nv24 + nv42 + p010be + p010le + p016be + p016le + isRGB: 0bgr 0rgb ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil: Add NV24 and NV42 pixel formats
ffmpeg | branch: master | Philip Langdale | Mon May 6 20:39:39 2019 -0700| [5de4f1d871d60886b9630531fa8c34cad13cc9dd] | committer: Philip Langdale avutil: Add NV24 and NV42 pixel formats These are the 4:4:4 variants of the semi-planar NV12/NV21 formats. These formats are not used much, so we've never had a reason to add them until now. VDPAU recently added support HEVC 4:4:4 content and when you use the OpenGL interop, the returned surfaces are in NV24 format, so we need the pixel format for media players, even if there's no direct use within ffmpeg. Separately, there are apparently webcams that use NV24, but I've never seen one. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5de4f1d871d60886b9630531fa8c34cad13cc9dd --- libavutil/pixdesc.c | 24 libavutil/pixfmt.h| 3 +++ libavutil/tests/pixfmt_best.c | 1 + libavutil/version.h | 4 ++-- tests/ref/fate/pixfmt_best| 2 +- 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index fe38344d73..b97b0665b0 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2320,6 +2320,30 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA, }, +[AV_PIX_FMT_NV24] = { +.name = "nv24", +.nb_components = 3, +.log2_chroma_w = 0, +.log2_chroma_h = 0, +.comp = { +{ 0, 1, 0, 0, 8, 0, 7, 1 },/* Y */ +{ 1, 2, 0, 0, 8, 1, 7, 1 },/* U */ +{ 1, 2, 1, 0, 8, 1, 7, 2 },/* V */ +}, +.flags = AV_PIX_FMT_FLAG_PLANAR, +}, +[AV_PIX_FMT_NV42] = { +.name = "nv42", +.nb_components = 3, +.log2_chroma_w = 0, +.log2_chroma_h = 0, +.comp = { +{ 0, 1, 0, 0, 8, 0, 7, 1 },/* Y */ +{ 1, 2, 1, 0, 8, 1, 7, 2 },/* U */ +{ 1, 2, 0, 0, 8, 1, 7, 1 },/* V */ +}, +.flags = AV_PIX_FMT_FLAG_PLANAR, +}, }; #if FF_API_PLUS1_MINUS1 FF_ENABLE_DEPRECATION_WARNINGS diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 24d1b7e415..8b54c9415b 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -345,6 +345,9 @@ enum AVPixelFormat { AV_PIX_FMT_YUVA444P12BE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), 12b alpha, big-endian AV_PIX_FMT_YUVA444P12LE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), 12b alpha, little-endian +AV_PIX_FMT_NV24, ///< planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V) +AV_PIX_FMT_NV42, ///< as above, but U and V bytes are swapped + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; diff --git a/libavutil/tests/pixfmt_best.c b/libavutil/tests/pixfmt_best.c index e98fcc19a5..53f7264207 100644 --- a/libavutil/tests/pixfmt_best.c +++ b/libavutil/tests/pixfmt_best.c @@ -76,6 +76,7 @@ int main(void) TEST(AV_PIX_FMT_P010, AV_PIX_FMT_YUV420P10); TEST(AV_PIX_FMT_P016, AV_PIX_FMT_YUV420P16); TEST(AV_PIX_FMT_NV16, AV_PIX_FMT_YUV422P); +TEST(AV_PIX_FMT_NV24, AV_PIX_FMT_YUV444P); TEST(AV_PIX_FMT_YUYV422, AV_PIX_FMT_YUV422P); TEST(AV_PIX_FMT_UYVY422, AV_PIX_FMT_YUV422P); TEST(AV_PIX_FMT_BGR565,AV_PIX_FMT_RGB565); diff --git a/libavutil/version.h b/libavutil/version.h index c0968de621..12b4f9fc3a 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,8 +79,8 @@ */ #define LIBAVUTIL_VERSION_MAJOR 56 -#define LIBAVUTIL_VERSION_MINOR 26 -#define LIBAVUTIL_VERSION_MICRO 101 +#define LIBAVUTIL_VERSION_MINOR 27 +#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --git a/tests/ref/fate/pixfmt_best b/tests/ref/fate/pixfmt_best index 699e2e4213..5f51e2d845 100644 --- a/tests/ref/fate/pixfmt_best +++ b/tests/ref/fate/pixfmt_best @@ -1 +1 @@ -72 tests passed, 0 tests failed. +73 tests passed, 0 tests failed. ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil/hcontext_cuda: Remove unnecessary stream synchronisation
ffmpeg | branch: master | Philip Langdale | Sat Mar 30 10:56:49 2019 -0700| [52d8f35b14bc379572e74d042d3466b8d3b6e7cf] | committer: Philip Langdale avutil/hcontext_cuda: Remove unnecessary stream synchronisation Similarly to the previous changes, we don't need to synchronise after a memcpy to device memory. On the other hand, we need to keep synchronising after a copy to host memory, otherwise there's no guarantee that subsequent host reads will return valid data. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=52d8f35b14bc379572e74d042d3466b8d3b6e7cf --- libavutil/hwcontext_cuda.c | 4 1 file changed, 4 deletions(-) diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 540a7610ef..cca39e9fc7 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -268,10 +268,6 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, goto exit; } -ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream)); -if (ret < 0) -goto exit; - exit: CHECK_CU(cu->cuCtxPopCurrent()); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/cuviddec: Remove unnecessary stream synchronisation
ffmpeg | branch: master | Philip Langdale | Sat Mar 30 10:51:07 2019 -0700| [5d90d1e36ef3abfa2843e54389d0ffd0fa7ca405] | committer: Philip Langdale avcodec/cuviddec: Remove unnecessary stream synchronisation We're also doing a sync here after copying the frame to be passed on down the pipleine. And it is also unnecessary. I was able to demonstrate a 33% speedup removing the sync from an example transcode pipeline. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5d90d1e36ef3abfa2843e54389d0ffd0fa7ca405 --- libavcodec/cuviddec.c | 4 1 file changed, 4 deletions(-) diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index 291bb93dbc..2aecb45768 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -553,10 +553,6 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) offset += height; } - -ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream)); -if (ret < 0) -goto error; } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || avctx->pix_fmt == AV_PIX_FMT_P010 || avctx->pix_fmt == AV_PIX_FMT_P016 || ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/vf_yadif_cuda: Remove unnecessary stream synchronisation
ffmpeg | branch: master | Philip Langdale | Sat Mar 30 08:50:44 2019 -0700| [c0b6e4cb6d6d41dbf2684891ed9dd43d9ddfb804] | committer: Philip Langdale avfilter/vf_yadif_cuda: Remove unnecessary stream synchronisation I put this call in by habit, rather than because there was any actual need. The filter is simply processing frames one after the other and has no need to synchronise. malakudi on the devtalk forums noticed a slowdown when using nvenc with temporal/spatial aq and that the slowdown went away if the sync call was removed. I also verified that in the basic encoding case there's an observable speedup. I also verified that we aren't doing unnecessary sync calls in any other filter. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c0b6e4cb6d6d41dbf2684891ed9dd43d9ddfb804 --- libavfilter/vf_yadif_cuda.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c index 141dcb17f7..c9eb1a229d 100644 --- a/libavfilter/vf_yadif_cuda.c +++ b/libavfilter/vf_yadif_cuda.c @@ -180,8 +180,6 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, parity, tff); } -CHECK_CU(cu->cuStreamSynchronize(s->stream)); - exit: CHECK_CU(cu->cuCtxPopCurrent()); return; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/vf_thumbnail_cuda: Switch to using ffnvcodec
ffmpeg | branch: master | Philip Langdale | Wed Feb 20 19:57:52 2019 -0800| [b4c9c09915de8ffaa4a2d2606e85729afa6c4e8e] | committer: Timo Rothenpieler avfilter/vf_thumbnail_cuda: Switch to using ffnvcodec This change switches the vf_thumbnail_cuda filter from using the full cuda sdk to using the ffnvcodec headers and loader. Most of the change is a direct mapping, but I also switched from using texture references to using texture objects. This is supposed to be the preferred way of using textures, and the texture object API is the one I added to ffnvcodec. Signed-off-by: Philip Langdale Signed-off-by: Timo Rothenpieler > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b4c9c09915de8ffaa4a2d2606e85729afa6c4e8e --- configure| 2 +- libavfilter/vf_thumbnail_cuda.c | 147 +-- libavfilter/vf_thumbnail_cuda.cu | 25 --- 3 files changed, 93 insertions(+), 81 deletions(-) diff --git a/configure b/configure index 079e95269e..57bb05f228 100755 --- a/configure +++ b/configure @@ -2976,7 +2976,7 @@ v4l2_m2m_deps="linux_videodev2_h sem_timedwait" hwupload_cuda_filter_deps="ffnvcodec" scale_npp_filter_deps="ffnvcodec libnpp" scale_cuda_filter_deps="ffnvcodec cuda_nvcc" -thumbnail_cuda_filter_deps="cuda_sdk" +thumbnail_cuda_filter_deps="ffnvcodec cuda_nvcc" transpose_npp_filter_deps="ffnvcodec libnpp" amf_deps_any="libdl LoadLibrary" diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c index 22691e156f..0c06815643 100644 --- a/libavfilter/vf_thumbnail_cuda.c +++ b/libavfilter/vf_thumbnail_cuda.c @@ -20,10 +20,8 @@ * DEALINGS IN THE SOFTWARE. */ -#include - #include "libavutil/hwcontext.h" -#include "libavutil/hwcontext_cuda.h" +#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/cuda_check.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -31,7 +29,7 @@ #include "avfilter.h" #include "internal.h" -#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) #define HIST_SIZE (3*256) #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) @@ -60,6 +58,7 @@ typedef struct ThumbnailCudaContext { AVRational tb; ///< copy of the input timebase to ease access AVBufferRef *hw_frames_ctx; +AVCUDADeviceContext *hwctx; CUmodulecu_module; @@ -67,12 +66,10 @@ typedef struct ThumbnailCudaContext { CUfunction cu_func_uchar2; CUfunction cu_func_ushort; CUfunction cu_func_ushort2; -CUtexrefcu_tex_uchar; -CUtexrefcu_tex_uchar2; -CUtexrefcu_tex_ushort; -CUtexrefcu_tex_ushort2; +CUstreamcu_stream; CUdeviceptr data; + } ThumbnailCudaContext; #define OFFSET(x) offsetof(ThumbnailCudaContext, x) @@ -157,29 +154,44 @@ static AVFrame *get_best_frame(AVFilterContext *ctx) return picref; } -static int thumbnail_kernel(ThumbnailCudaContext *ctx, CUfunction func, CUtexref tex, int channels, +static int thumbnail_kernel(AVFilterContext *ctx, CUfunction func, int channels, int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size) { -CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; -void *args[] = { , _width, _height }; -CUDA_ARRAY_DESCRIPTOR desc; - -desc.Width = src_width; -desc.Height = src_height; -desc.NumChannels = channels; -if (pixel_size == 1) { -desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; -} -else { -desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; -} +int ret; +ThumbnailCudaContext *s = ctx->priv; +CudaFunctions *cu = s->hwctx->internal->cuda_dl; +CUtexObject tex = 0; +void *args[] = { , , _width, _height }; -CHECK_CU(cuTexRefSetAddress2D_v3(tex, , src_devptr, src_pitch)); -CHECK_CU(cuLaunchKernel(func, -DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, -BLOCKX, BLOCKY, 1, 0, 0, args, NULL)); +CUDA_TEXTURE_DESC tex_desc = { +.filterMode = CU_TR_FILTER_MODE_LINEAR, +.flags = CU_TRSF_READ_AS_INTEGER, +}; -return 0; +CUDA_RESOURCE_DESC res_desc = { +.resType = CU_RESOURCE_TYPE_PITCH2D, +.res.pitch2D.format = pixel_size == 1 ? + CU_AD_FORMAT_UNSIGNED_INT8 : + CU_AD_FORMAT_UNSIGNED_INT16, +.res.pitch2D.numChannels = channels, +.res.pitch2D.width = src_width, +.res.pitch2D.height = src_height, +.res.pitch2D.pitchInBytes = src_pitch, +.res.pitch2D.devPtr = (CUdeviceptr)src_dptr, +}; + +ret = CHECK_CU(cu->cuTexObjectCreate(, _desc, _desc, NULL)); +if (ret < 0) +goto exit; + +r
[FFmpeg-cvslog] avfilter/vf_scale_cuda: Switch to using ffnvcodec
ffmpeg | branch: master | Philip Langdale | Wed Feb 20 19:57:51 2019 -0800| [2544c7ea67ca9521c5de36396bc9ac7058223742] | committer: Timo Rothenpieler avfilter/vf_scale_cuda: Switch to using ffnvcodec This change switches the vf_scale_cuda filter from using the full cuda sdk to using the ffnvcodec headers and loader. Most of the change is a direct mapping, but I also switched from using texture references to using texture objects. This is supposed to be the preferred way of using textures, and the texture object API is the one I added to ffnvcodec. Signed-off-by: Philip Langdale Signed-off-by: Timo Rothenpieler > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2544c7ea67ca9521c5de36396bc9ac7058223742 --- configure| 2 +- libavfilter/vf_scale_cuda.c | 168 +++ libavfilter/vf_scale_cuda.cu | 73 ++- 3 files changed, 128 insertions(+), 115 deletions(-) diff --git a/configure b/configure index b48c0df6eb..079e95269e 100755 --- a/configure +++ b/configure @@ -2975,7 +2975,7 @@ v4l2_m2m_deps="linux_videodev2_h sem_timedwait" hwupload_cuda_filter_deps="ffnvcodec" scale_npp_filter_deps="ffnvcodec libnpp" -scale_cuda_filter_deps="cuda_sdk" +scale_cuda_filter_deps="ffnvcodec cuda_nvcc" thumbnail_cuda_filter_deps="cuda_sdk" transpose_npp_filter_deps="ffnvcodec libnpp" diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 53b7aa9531..c97a802ddc 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -20,14 +20,13 @@ * DEALINGS IN THE SOFTWARE. */ -#include #include #include #include "libavutil/avstring.h" #include "libavutil/common.h" #include "libavutil/hwcontext.h" -#include "libavutil/hwcontext_cuda.h" +#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/cuda_check.h" #include "libavutil/internal.h" #include "libavutil/opt.h" @@ -53,10 +52,13 @@ static const enum AVPixelFormat supported_formats[] = { #define BLOCKX 32 #define BLOCKY 16 -#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) typedef struct CUDAScaleContext { const AVClass *class; + +AVCUDADeviceContext *hwctx; + enum AVPixelFormat in_fmt; enum AVPixelFormat out_fmt; @@ -80,7 +82,6 @@ typedef struct CUDAScaleContext { char *h_expr; ///< height expression string CUcontext cu_ctx; -CUevent cu_event; CUmodulecu_module; CUfunction cu_func_uchar; CUfunction cu_func_uchar2; @@ -88,12 +89,7 @@ typedef struct CUDAScaleContext { CUfunction cu_func_ushort; CUfunction cu_func_ushort2; CUfunction cu_func_ushort4; -CUtexrefcu_tex_uchar; -CUtexrefcu_tex_uchar2; -CUtexrefcu_tex_uchar4; -CUtexrefcu_tex_ushort; -CUtexrefcu_tex_ushort2; -CUtexrefcu_tex_ushort4; +CUstreamcu_stream; CUdeviceptr srcBuffer; CUdeviceptr dstBuffer; @@ -258,48 +254,49 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; +CudaFunctions *cu = device_hwctx->internal->cuda_dl; int w, h; int ret; extern char vf_scale_cuda_ptx[]; -ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); +s->hwctx = device_hwctx; +s->cu_stream = s->hwctx->stream; + +ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); +if (ret < 0) +goto fail; + +ret = CHECK_CU(cu->cuModuleLoadData(>cu_module, vf_scale_cuda_ptx)); +if (ret < 0) +goto fail; + +CHECK_CU(cu->cuModuleGetFunction(>cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); +if (ret < 0) +goto fail; + +CHECK_CU(cu->cuModuleGetFunction(>cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); +if (ret < 0) +goto fail; + +CHECK_CU(cu->cuModuleGetFunction(>cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); +if (ret < 0) +goto fail; + +CHECK_CU(cu->cuModuleGetFunction(>cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); if (ret < 0) goto fail; -ret = CHECK_CU(cuModuleLoadData(>cu_module, vf_scale_cuda_ptx)); +CHECK_CU(cu->cuModuleGetFunction(>cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); +if (ret < 0) +goto fail; + +CHECK_CU(cu->cuModuleGetFunction(>cu_func_ushort4, s->cu_module, "Subsample_Bilin
[FFmpeg-cvslog] configure: deprecate cuda_sdk dependency option
ffmpeg | branch: master | Philip Langdale | Wed Feb 20 19:57:53 2019 -0800| [114ead9735f226e5824a15b94b32344436c96a71] | committer: Timo Rothenpieler configure: deprecate cuda_sdk dependency option With all of our existing users of cuda_sdk switched over to ffnvcodec, we could remove cuda_sdk completely and say that we should no longer add code that requires the full sdk, and rather insist that such code only use ffnvcodec. As discussed previously, the use of nvcc from the sdk is still supported with a distinct option. Signed-off-by: Philip Langdale Signed-off-by: Timo Rothenpieler > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=114ead9735f226e5824a15b94b32344436c96a71 --- configure | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 57bb05f228..6c6c1c44bb 100755 --- a/configure +++ b/configure @@ -322,7 +322,6 @@ External library support: --disable-amfdisable AMF video encoding code [autodetect] --disable-audiotoolbox disable Apple AudioToolbox code [autodetect] --enable-cuda-nvcc enable Nvidia CUDA compiler [no] - --enable-cuda-sdkenable CUDA features that require the CUDA SDK [no] --disable-cuvid disable Nvidia CUVID support [autodetect] --disable-d3d11vadisable Microsoft Direct3D 11 video acceleration code [autodetect] --disable-dxva2 disable Microsoft DirectX 9 video acceleration code [autodetect] @@ -6036,6 +6035,11 @@ check_type "va/va.h va/va_enc_vp9.h" "VAEncPictureParameterBufferVP9" check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC" +if enabled cuda_sdk; then +warn "Option --enable-cuda-sdk is deprecated. Use --enable-cuda-nvcc instead." +enable cuda_nvcc +fi + if ! disabled ffnvcodec; then check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.2" \ "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" "" || \ @@ -6111,7 +6115,6 @@ done # these are off by default, so fail if requested and not available enabled cuda_nvcc && { check_nvcc || die "ERROR: failed checking for nvcc."; } -enabled cuda_sdk && require cuda_sdk cuda.h cuCtxCreate -lcuda enabled chromaprint && require chromaprint chromaprint.h chromaprint_get_version -lchromaprint enabled decklink && { require_headers DeckLinkAPI.h && { test_cpp_condition DeckLinkAPIVersion.h "BLACKMAGIC_DECKLINK_API_VERSION >= 0x0a090500" || die "ERROR: Decklink API version must be >= 10.9.5."; } } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avfilter/vf_yadif_cuda: Switch to using ffnvcodec
ffmpeg | branch: master | Philip Langdale | Wed Feb 20 19:57:50 2019 -0800| [7debf4277c29dcd855130f92ad54da0c4a535221] | committer: Timo Rothenpieler avfilter/vf_yadif_cuda: Switch to using ffnvcodec This change switches the vf_thumbnail_cuda filter from using the full cuda sdk to using the ffnvcodec headers and loader. Signed-off-by: Philip Langdale Signed-off-by: Timo Rothenpieler > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7debf4277c29dcd855130f92ad54da0c4a535221 --- configure | 2 +- libavfilter/vf_yadif_cuda.c | 58 - 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/configure b/configure index cf1b96097f..b48c0df6eb 100755 --- a/configure +++ b/configure @@ -3535,7 +3535,7 @@ zscale_filter_deps="libzimg const_nan" scale_vaapi_filter_deps="vaapi" vpp_qsv_filter_deps="libmfx" vpp_qsv_filter_select="qsvvpp" -yadif_cuda_filter_deps="cuda_sdk" +yadif_cuda_filter_deps="ffnvcodec cuda_nvcc" # examples avio_dir_cmd_deps="avformat avutil" diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c index 85e1aac5eb..141dcb17f7 100644 --- a/libavfilter/vf_yadif_cuda.c +++ b/libavfilter/vf_yadif_cuda.c @@ -18,9 +18,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include #include "libavutil/avassert.h" -#include "libavutil/hwcontext_cuda.h" +#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/cuda_check.h" #include "internal.h" #include "yadif.h" @@ -49,7 +48,7 @@ typedef struct DeintCUDAContext { #define BLOCKX 32 #define BLOCKY 16 -#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next, @@ -64,6 +63,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, int parity, int tff) { DeintCUDAContext *s = ctx->priv; +CudaFunctions *cu = s->hwctx->internal->cuda_dl; CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0; int ret; int skip_spatial_check = s->yadif.mode&2; @@ -88,32 +88,32 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, }; res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev; -ret = CHECK_CU(cuTexObjectCreate(_prev, _desc, _desc, NULL)); +ret = CHECK_CU(cu->cuTexObjectCreate(_prev, _desc, _desc, NULL)); if (ret < 0) goto exit; res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur; -ret = CHECK_CU(cuTexObjectCreate(_cur, _desc, _desc, NULL)); +ret = CHECK_CU(cu->cuTexObjectCreate(_cur, _desc, _desc, NULL)); if (ret < 0) goto exit; res_desc.res.pitch2D.devPtr = (CUdeviceptr)next; -ret = CHECK_CU(cuTexObjectCreate(_next, _desc, _desc, NULL)); +ret = CHECK_CU(cu->cuTexObjectCreate(_next, _desc, _desc, NULL)); if (ret < 0) goto exit; -ret = CHECK_CU(cuLaunchKernel(func, - DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, - BLOCKX, BLOCKY, 1, - 0, s->stream, args, NULL)); +ret = CHECK_CU(cu->cuLaunchKernel(func, + DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, + BLOCKX, BLOCKY, 1, + 0, s->stream, args, NULL)); exit: if (tex_prev) -CHECK_CU(cuTexObjectDestroy(tex_prev)); +CHECK_CU(cu->cuTexObjectDestroy(tex_prev)); if (tex_cur) -CHECK_CU(cuTexObjectDestroy(tex_cur)); +CHECK_CU(cu->cuTexObjectDestroy(tex_cur)); if (tex_next) -CHECK_CU(cuTexObjectDestroy(tex_next)); +CHECK_CU(cu->cuTexObjectDestroy(tex_next)); return ret; } @@ -123,10 +123,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, { DeintCUDAContext *s = ctx->priv; YADIFContext *y = >yadif; +CudaFunctions *cu = s->hwctx->internal->cuda_dl; CUcontext dummy; int i, ret; -ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); +ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx)); if (ret < 0) return; @@ -179,10 +180,10 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, parity, tff); } -CHECK_CU(cuStreamSynchronize(s->stream)); +CHECK_CU(cu->cuStreamSynchronize(s->stream)); exit: -CHECK_CU(cuCtxPopCurrent()); +CHECK_CU(cu->cuCtxPopCurrent()); return; } @@ -192,10 +193,11 @@ static av_cold void deint_cuda_uninit(AVFilterContext *ctx)
[FFmpeg-cvslog] configure: Add an explicit check and option for nvcc
ffmpeg | branch: master | Philip Langdale | Wed Feb 20 19:57:49 2019 -0800| [5f47bfd50ac46fb18b23a212a53bea0feacc1bb0] | committer: Timo Rothenpieler configure: Add an explicit check and option for nvcc The use of nvcc to compile cuda kernels is distinct from the use of cuda sdk libraries and linking against those libraries. We have previously not bothered to distinguish these two cases because all the filters that used cuda kernels also used the sdk. In the following changes, I'm going to remove the sdk dependency from those filters, but we need a way to ensure that nvcc is present and functioning, and also a way to explicitly disable its use so that the filters are not built. Signed-off-by: Timo Rothenpieler > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5f47bfd50ac46fb18b23a212a53bea0feacc1bb0 --- configure | 24 1 file changed, 24 insertions(+) diff --git a/configure b/configure index bf40c1dcb9..cf1b96097f 100755 --- a/configure +++ b/configure @@ -321,6 +321,7 @@ External library support: The following libraries provide various hardware acceleration features: --disable-amfdisable AMF video encoding code [autodetect] --disable-audiotoolbox disable Apple AudioToolbox code [autodetect] + --enable-cuda-nvcc enable Nvidia CUDA compiler [no] --enable-cuda-sdkenable CUDA features that require the CUDA SDK [no] --disable-cuvid disable Nvidia CUVID support [autodetect] --disable-d3d11vadisable Microsoft Direct3D 11 video acceleration code [autodetect] @@ -1001,6 +1002,10 @@ hostcc_o(){ eval printf '%s\\n' $HOSTCC_O } +nvcc_o(){ +eval printf '%s\\n' $NVCC_O +} + test_cc(){ log test_cc "$@" cat > $TMPC @@ -1022,6 +1027,22 @@ test_objcc(){ test_cmd $objcc -Werror=missing-prototypes $CPPFLAGS $CFLAGS $OBJCFLAGS "$@" $OBJCC_C $(cc_o $TMPO) $TMPM } +test_nvcc(){ +log test_nvcc "$@" +cat > $TMPCU +log_file $TMPCU +test_cmd $nvcc -ptx $NVCCFLAGS "$@" $NVCC_C $(nvcc_o $TMPO) $TMPCU +} + +check_nvcc() { +log check_nvcc "$@" +test_nvcc < $TMPC @@ -1806,6 +1827,7 @@ EXTRALIBS_LIST=" " HWACCEL_LIBRARY_NONFREE_LIST=" +cuda_nvcc cuda_sdk libnpp " @@ -4238,6 +4260,7 @@ tmpfile TMPCPP .cpp tmpfile TMPE $EXESUF tmpfile TMPH .h tmpfile TMPM .m +tmpfile TMPCU .cu tmpfile TMPO .o tmpfile TMPS .S tmpfile TMPSH .sh @@ -6087,6 +6110,7 @@ for func in $COMPLEX_FUNCS; do done # these are off by default, so fail if requested and not available +enabled cuda_nvcc && { check_nvcc || die "ERROR: failed checking for nvcc."; } enabled cuda_sdk && require cuda_sdk cuda.h cuCtxCreate -lcuda enabled chromaprint && require chromaprint chromaprint.h chromaprint_get_version -lchromaprint enabled decklink && { require_headers DeckLinkAPI.h && ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avutil/cuda_check: Fix non-dynamic-loader implementation
ffmpeg | branch: master | Philip Langdale | Tue Feb 19 19:12:19 2019 -0800| [96d79ff5b5c8b08eaead6fd2c77a265fd2c40b22] | committer: Philip Langdale avutil/cuda_check: Fix non-dynamic-loader implementation The function typedefs we were using are only present when using the dynamic loader, which means compilation breaks for code directly using the cuda SDK. To fix this, let's just duplicate the function typedefs locally. These are not going to change. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=96d79ff5b5c8b08eaead6fd2c77a265fd2c40b22 --- libavutil/cuda_check.h | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libavutil/cuda_check.h b/libavutil/cuda_check.h index ec1705b000..d02ea7eec4 100644 --- a/libavutil/cuda_check.h +++ b/libavutil/cuda_check.h @@ -20,6 +20,9 @@ #ifndef AVUTIL_CUDA_CHECK_H #define AVUTIL_CUDA_CHECK_H +typedef CUresult CUDAAPI cuda_check_GetErrorName(CUresult error, const char** pstr); +typedef CUresult CUDAAPI cuda_check_GetErrorString(CUresult error, const char** pstr); + /** * Wrap a CUDA function call and print error information if it fails. */ @@ -35,8 +38,8 @@ static inline int ff_cuda_check(void *avctx, if (err == CUDA_SUCCESS) return 0; -((tcuGetErrorName *)cuGetErrorName_fn)(err, _name); -((tcuGetErrorString *)cuGetErrorString_fn)(err, _string); +((cuda_check_GetErrorName *)cuGetErrorName_fn)(err, _name); +((cuda_check_GetErrorString *)cuGetErrorString_fn)(err, _string); av_log(avctx, AV_LOG_ERROR, "%s failed", func); if (err_name && err_string) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/version: Bump micro-version for nvdec/cuviddec changes
ffmpeg | branch: master | Philip Langdale | Sat Feb 16 10:40:32 2019 -0800| [d6fc5dc24aa09e026c6271a7565e63798dfe46f3] | committer: Philip Langdale avcodec/version: Bump micro-version for nvdec/cuviddec changes I forgot to add the version bump and changelog within the changes. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d6fc5dc24aa09e026c6271a7565e63798dfe46f3 --- Changelog| 1 + libavcodec/version.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index d5515c4911..4d80e5b54f 100644 --- a/Changelog +++ b/Changelog @@ -18,6 +18,7 @@ version : - hcom demuxer and decoder - ARBC decoder - libaribb24 based ARIB STD-B24 caption support (profiles A and C) +- Support decoding of HEVC 4:4:4 content in nvdec and cuviddec version 4.1: diff --git a/libavcodec/version.h b/libavcodec/version.h index f2f188ea7c..7c3897e2d4 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 58 #define LIBAVCODEC_VERSION_MINOR 47 -#define LIBAVCODEC_VERSION_MICRO 101 +#define LIBAVCODEC_VERSION_MICRO 102 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/cuviddec: Add support for decoding HEVC 4:4:4 content
ffmpeg | branch: master | Philip Langdale | Sun Oct 7 09:10:00 2018 -0700| [317b7b06fd97cd39feac7df57db22a30550351ff] | committer: Philip Langdale avcodec/cuviddec: Add support for decoding HEVC 4:4:4 content This is the equivalent change for cuviddec after the previous change for nvdec. I made similar changes to the copying routines to handle pixel formats in a more generic way. Note that unlike with nvdec, there is no confusion about the ability of a codec to output 444 formats. This is because the cuvid parser is used, meaning that 444 JPEG content is still indicated as using a 420 output format. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=317b7b06fd97cd39feac7df57db22a30550351ff --- libavcodec/cuviddec.c | 66 +++ 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index 03589367ce..291bb93dbc 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -34,8 +34,14 @@ #include "avcodec.h" #include "decode.h" #include "hwaccel.h" +#include "nvdec.h" #include "internal.h" +#if !NVDECAPI_CHECK_VERSION(9, 0) +#define cudaVideoSurfaceFormat_YUV444 2 +#define cudaVideoSurfaceFormat_YUV444_16Bit 3 +#endif + typedef struct CuvidContext { AVClass *avclass; @@ -106,6 +112,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form CUVIDDECODECAPS *caps = NULL; CUVIDDECODECREATEINFO cuinfo; int surface_fmt; +int chroma_444; int old_width = avctx->width; int old_height = avctx->height; @@ -148,17 +155,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form cuinfo.target_rect.right = cuinfo.ulTargetWidth; cuinfo.target_rect.bottom = cuinfo.ulTargetHeight; +chroma_444 = format->chroma_format == cudaVideoChromaFormat_444; + switch (format->bit_depth_luma_minus8) { case 0: // 8-bit -pix_fmts[1] = AV_PIX_FMT_NV12; +pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12; caps = >caps8; break; case 2: // 10-bit -pix_fmts[1] = AV_PIX_FMT_P010; +pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010; caps = >caps10; break; case 4: // 12-bit -pix_fmts[1] = AV_PIX_FMT_P016; +pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016; caps = >caps12; break; default: @@ -261,12 +270,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form return 0; } -if (format->chroma_format != cudaVideoChromaFormat_420) { -av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n"); -ctx->internal_error = AVERROR(EINVAL); -return 0; -} - ctx->chroma_format = format->chroma_format; cuinfo.CodecType = ctx->codec_type = format->codec; @@ -280,8 +283,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form case AV_PIX_FMT_P016: cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016; break; +case AV_PIX_FMT_YUV444P: +cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444; +break; +case AV_PIX_FMT_YUV444P16: +cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit; +break; default: -av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n"); +av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); ctx->internal_error = AVERROR(EINVAL); return 0; } @@ -490,6 +500,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) return ret; if (av_fifo_size(ctx->frame_queue)) { +const AVPixFmtDescriptor *pixdesc; CuvidParsedFrame parsed_frame; CUVIDPROCPARAMS params; unsigned int pitch = 0; @@ -520,7 +531,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) goto error; } -for (i = 0; i < 2; i++) { +pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + +for (i = 0; i < pixdesc->nb_components; i++) { +int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0); CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, @@ -530,22 +544,25 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[
[FFmpeg-cvslog] avcodec/nvdec: Explicitly mark codecs that support 444 output formats
ffmpeg | branch: master | Philip Langdale | Sat Oct 6 20:20:58 2018 -0700| [83c7ac2e47efd96927127c1c385cdbb5fb53cb02] | committer: Philip Langdale avcodec/nvdec: Explicitly mark codecs that support 444 output formats With the introduction of HEVC 444 support, we technically have two codecs that can handle 444 - HEVC and MJPEG. In the case of MJPEG, it can decode, but can only output one of the semi-planar formats. That means we need additional logic to decide whether to use a 444 output format or not. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=83c7ac2e47efd96927127c1c385cdbb5fb53cb02 --- libavcodec/nvdec.c| 7 --- libavcodec/nvdec.h| 5 - libavcodec/nvdec_h264.c | 2 +- libavcodec/nvdec_hevc.c | 10 -- libavcodec/nvdec_mjpeg.c | 2 +- libavcodec/nvdec_mpeg12.c | 2 +- libavcodec/nvdec_mpeg4.c | 2 +- libavcodec/nvdec_vc1.c| 2 +- libavcodec/nvdec_vp8.c| 2 +- libavcodec/nvdec_vp9.c| 2 +- 10 files changed, 23 insertions(+), 13 deletions(-) diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 72201a1123..b60da24301 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -298,7 +298,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n"); return AVERROR(ENOSYS); } -chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444; +chroma_444 = ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444; if (!avctx->hw_frames_ctx) { ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA); @@ -587,7 +587,8 @@ static AVBufferRef *nvdec_alloc_dummy(int size) int ff_nvdec_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx, - int dpb_size) + int dpb_size, + int supports_444) { AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; const AVPixFmtDescriptor *sw_desc; @@ -608,7 +609,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n"); return AVERROR(EINVAL); } -chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444; +chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444; frames_ctx->format= AV_PIX_FMT_CUDA; frames_ctx->width = (avctx->coded_width + 1) & ~1; diff --git a/libavcodec/nvdec.h b/libavcodec/nvdec.h index 85a0fcf725..09ae8c37e6 100644 --- a/libavcodec/nvdec.h +++ b/libavcodec/nvdec.h @@ -61,6 +61,8 @@ typedef struct NVDECContext { unsigned *slice_offsets; int nb_slices; unsigned int slice_offsets_allocated; + +int supports_444; } NVDECContext; int ff_nvdec_decode_init(AVCodecContext *avctx); @@ -72,7 +74,8 @@ int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size); int ff_nvdec_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx, - int dpb_size); + int dpb_size, + int supports_444); int ff_nvdec_get_ref_idx(AVFrame *frame); #endif /* AVCODEC_NVDEC_H */ diff --git a/libavcodec/nvdec_h264.c b/libavcodec/nvdec_h264.c index 25b30329d0..116bd4fb5d 100644 --- a/libavcodec/nvdec_h264.c +++ b/libavcodec/nvdec_h264.c @@ -166,7 +166,7 @@ static int nvdec_h264_frame_params(AVCodecContext *avctx, { const H264Context *h = avctx->priv_data; const SPS *sps = h->ps.sps; -return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->ref_frame_count + sps->num_reorder_frames); +return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->ref_frame_count + sps->num_reorder_frames, 0); } const AVHWAccel ff_h264_nvdec_hwaccel = { diff --git a/libavcodec/nvdec_hevc.c b/libavcodec/nvdec_hevc.c index d11b5e8a38..590278ba04 100644 --- a/libavcodec/nvdec_hevc.c +++ b/libavcodec/nvdec_hevc.c @@ -299,7 +299,13 @@ static int nvdec_hevc_frame_params(AVCodecContext *avctx, { const HEVCContext *s = avctx->priv_data; const HEVCSPS *sps = s->ps.sps; -return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + 1); +return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + 1, 1); +} + +static int nvdec_hevc_decode_init(AVCodecContext *avctx) { +NVDECContext *ctx = avctx->internal->hwaccel_priv_data; +ctx->supports_444 = 1; +return ff_nvdec_decode_init(avctx); } const AVHWAccel ff_hevc_nvdec_hwaccel = { @@ -311,7 +317,7 @@ const AVHWAccel ff_hevc_nvdec_hwaccel = { .end_frame= ff_nvdec_end_frame,
[FFmpeg-cvslog] avcodec/hevc_ps: Expose all SPS and PPS range extension flags
ffmpeg | branch: master | Philip Langdale | Wed Feb 13 12:40:52 2019 -0800| [f4ea930a119298c6110ee4e3d24219a66e27e230] | committer: Philip Langdale avcodec/hevc_ps: Expose all SPS and PPS range extension flags We need all the flags to be exposed to be able to pass them on to HW decoders. I did not attempt to nuance any of the warnings about flags being unsupported as there's no way, at the point we extract flags, to say whether an HW decoder is being used. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f4ea930a119298c6110ee4e3d24219a66e27e230 --- libavcodec/hevc_ps.c | 19 --- libavcodec/hevc_ps.h | 4 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index ea984af0a1..80df417e4f 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -1102,20 +1102,17 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, decode_vui(gb, avctx, apply_defdispwin, sps); if (get_bits1(gb)) { // sps_extension_flag -int sps_range_extension_flag = get_bits1(gb); +sps->sps_range_extension_flag = get_bits1(gb); skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7); -if (sps_range_extension_flag) { -int extended_precision_processing_flag; -int cabac_bypass_alignment_enabled_flag; - +if (sps->sps_range_extension_flag) { sps->transform_skip_rotation_enabled_flag = get_bits1(gb); sps->transform_skip_context_enabled_flag = get_bits1(gb); sps->implicit_rdpcm_enabled_flag = get_bits1(gb); sps->explicit_rdpcm_enabled_flag = get_bits1(gb); -extended_precision_processing_flag = get_bits1(gb); -if (extended_precision_processing_flag) +sps->extended_precision_processing_flag = get_bits1(gb); +if (sps->extended_precision_processing_flag) av_log(avctx, AV_LOG_WARNING, "extended_precision_processing_flag not yet implemented\n"); @@ -1127,8 +1124,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->persistent_rice_adaptation_enabled_flag = get_bits1(gb); -cabac_bypass_alignment_enabled_flag = get_bits1(gb); -if (cabac_bypass_alignment_enabled_flag) +sps->cabac_bypass_alignment_enabled_flag = get_bits1(gb); +if (sps->cabac_bypass_alignment_enabled_flag) av_log(avctx, AV_LOG_WARNING, "cabac_bypass_alignment_enabled_flag not yet implemented\n"); } @@ -1686,9 +1683,9 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->slice_header_extension_present_flag = get_bits1(gb); if (get_bits1(gb)) { // pps_extension_present_flag -int pps_range_extensions_flag = get_bits1(gb); +pps->pps_range_extensions_flag = get_bits1(gb); skip_bits(gb, 7); // pps_extension_7bits -if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps_range_extensions_flag) { +if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) { if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0) goto err; } diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 1fbda199e3..bbaa9205ef 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -284,13 +284,16 @@ typedef struct HEVCSPS { int max_transform_hierarchy_depth_inter; int max_transform_hierarchy_depth_intra; +int sps_range_extension_flag; int transform_skip_rotation_enabled_flag; int transform_skip_context_enabled_flag; int implicit_rdpcm_enabled_flag; int explicit_rdpcm_enabled_flag; +int extended_precision_processing_flag; int intra_smoothing_disabled_flag; int high_precision_offsets_enabled_flag; int persistent_rice_adaptation_enabled_flag; +int cabac_bypass_alignment_enabled_flag; ///< coded frame dimension in various units int width; @@ -365,6 +368,7 @@ typedef struct HEVCPPS { int num_extra_slice_header_bits; uint8_t slice_header_extension_present_flag; uint8_t log2_max_transform_skip_block_size; +uint8_t pps_range_extensions_flag; uint8_t cross_component_prediction_enabled_flag; uint8_t chroma_qp_offset_list_enabled_flag; uint8_t diff_cu_chroma_qp_offset_depth; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content
ffmpeg | branch: master | Philip Langdale | Sat Oct 6 18:11:52 2018 -0700| [e06ccfbe1d33c00d6f1df202a514219c7fdb7c03] | committer: Philip Langdale avcodec/nvdec: Add support for decoding HEVC 4:4:4 content The latest generation video decoder on the Turing chips supports decoding HEVC 4:4:4. Supporting this is relatively straight-forward; we need to account for the different chroma format and pick the right output and sw formats at the right times. There was one bug which was the hard-coded assumption that the first chroma plane would be half-height; I fixed this to use the actual shift value on the plane. We also need to pass the SPS and PPS range extension flags. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e06ccfbe1d33c00d6f1df202a514219c7fdb7c03 --- libavcodec/hevcdec.c| 3 +++ libavcodec/nvdec.c | 42 ++ libavcodec/nvdec_hevc.c | 30 ++ 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index b2a87d55db..967f8f1def 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -409,6 +409,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif break; case AV_PIX_FMT_YUV420P12: +case AV_PIX_FMT_YUV444P: +case AV_PIX_FMT_YUV444P10: +case AV_PIX_FMT_YUV444P12: #if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index c7d5379770..72201a1123 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -35,6 +35,11 @@ #include "nvdec.h" #include "internal.h" +#if !NVDECAPI_CHECK_VERSION(9, 0) +#define cudaVideoSurfaceFormat_YUV444 2 +#define cudaVideoSurfaceFormat_YUV444_16Bit 3 +#endif + typedef struct NVDECDecoder { CUvideodecoder decoder; @@ -274,7 +279,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) CUVIDDECODECREATEINFO params = { 0 }; -int cuvid_codec_type, cuvid_chroma_format; +cudaVideoSurfaceFormat output_format; +int cuvid_codec_type, cuvid_chroma_format, chroma_444; int ret = 0; sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); @@ -292,6 +298,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n"); return AVERROR(ENOSYS); } +chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444; if (!avctx->hw_frames_ctx) { ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA); @@ -299,6 +306,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) return ret; } +switch (sw_desc->comp[0].depth) { +case 8: +output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 : + cudaVideoSurfaceFormat_NV12; +break; +case 10: +case 12: +output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit : + cudaVideoSurfaceFormat_P016; +break; +default: +av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n"); +return AVERROR(ENOSYS); +} + frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; params.ulWidth = avctx->coded_width; @@ -306,8 +328,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) params.ulTargetWidth = avctx->coded_width; params.ulTargetHeight = avctx->coded_height; params.bitDepthMinus8 = sw_desc->comp[0].depth - 8; -params.OutputFormat= params.bitDepthMinus8 ? - cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; +params.OutputFormat= output_format; params.CodecType = cuvid_codec_type; params.ChromaFormat= cuvid_chroma_format; params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size; @@ -386,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) NVDECFrame*cf = (NVDECFrame*)fdd->hwaccel_priv; NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data; +AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data; + CUVIDPROCPARAMS vpp = { 0 }; NVDECFrame *unmap_data = NULL; @@ -394,6 +417,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) unsigned int pitch, i; unsigned int offset = 0; +int shift_h = 0, shift_v = 0; int ret = 0; vpp.progressive_frame = 1; @@ -427,10 +451,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) unmap_data->idx_ref = av_buffer_ref(cf->idx_ref); unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref); +av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, _h, _v); for (i = 0; frame->linesize[i]; i++) { frame->data[i] = (uint8_t*)(devptr + offset);
[FFmpeg-cvslog] avutil/cuda_check: Make sure this passes make fate-source
ffmpeg | branch: master | Philip Langdale | Sat Nov 17 08:16:28 2018 -0800| [420ab946ace27e4b4bfb6c2be0a65a4ffd6e05a1] | committer: Philip Langdale avutil/cuda_check: Make sure this passes make fate-source The header guards were unnecessarily non-standard and the c file inclusion trick means the files dont't have standard licence headers. Based on a patch by: Martin Vignali > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=420ab946ace27e4b4bfb6c2be0a65a4ffd6e05a1 --- libavutil/cuda_check.h | 6 +++--- tests/ref/fate/source | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libavutil/cuda_check.h b/libavutil/cuda_check.h index 0d45538c2f..b8e5f65cbb 100644 --- a/libavutil/cuda_check.h +++ b/libavutil/cuda_check.h @@ -17,8 +17,8 @@ */ -#ifndef FF_CUDA_CHECK_H -#define FF_CUDA_CHECK_H +#ifndef AVUTIL_CUDA_CHECK_H +#define AVUTIL_CUDA_CHECK_H /** * Wrap a CUDA function call and print error information if it fails. @@ -40,4 +40,4 @@ int ff_cuda_check(void *avctx, #define FF_CUDA_CHECK_DL(avclass, cudl, x) ff_cuda_check(avclass, cudl->cuGetErrorName, cudl->cuGetErrorString, (x), #x) -#endif /* FF_CUDA_CHECK_H */ +#endif /* AVUTIL_CUDA_CHECK_H */ diff --git a/tests/ref/fate/source b/tests/ref/fate/source index 4b9467aa77..b35f016127 100644 --- a/tests/ref/fate/source +++ b/tests/ref/fate/source @@ -1,6 +1,7 @@ Files without standard license headers: compat/avisynth/windowsPorts/basicDataTypeConversions.h compat/avisynth/windowsPorts/windows2linux.h +libavcodec/cuda_check.c libavcodec/file_open.c libavcodec/ilbcdata.h libavcodec/ilbcdec.c @@ -9,6 +10,7 @@ libavcodec/log2_tab.c libavcodec/reverse.c libavdevice/file_open.c libavdevice/reverse.c +libavfilter/cuda_check.c libavfilter/log2_tab.c libavformat/file_open.c libavformat/golomb_tab.c ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avfilter/yadif_common: Add field type tracking to help bwdif
ffmpeg | branch: master | Philip Langdale | Sun Nov 4 10:02:07 2018 -0800| [fa74e4aef2103e27424d2cfae3f142149b6a3b36] | committer: Philip Langdale avfilter/yadif_common: Add field type tracking to help bwdif The bwdif filter can use common yadif frame management if we track when a field is the first or last field in a sequence. While this information is not used by yadif, the added benefit of removing the duplicated frame management logic makes it worth tracking this state in the common code. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fa74e4aef2103e27424d2cfae3f142149b6a3b36 --- libavfilter/yadif.h| 14 ++ libavfilter/yadif_common.c | 12 +--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/libavfilter/yadif.h b/libavfilter/yadif.h index 32d6f4a0d4..c928911b35 100644 --- a/libavfilter/yadif.h +++ b/libavfilter/yadif.h @@ -41,6 +41,12 @@ enum YADIFDeint { YADIF_DEINT_INTERLACED = 1, ///< only deinterlace frames marked as interlaced }; +enum YADIFCurrentField { +YADIF_FIELD_BACK_END = -1, ///< The last frame in a sequence +YADIF_FIELD_END = 0, ///< The first or last field in a sequence +YADIF_FIELD_NORMAL = 1, ///< A normal field in the middle of a sequence +}; + typedef struct YADIFContext { const AVClass *class; @@ -70,6 +76,14 @@ typedef struct YADIFContext { int eof; uint8_t *temp_line; int temp_line_size; + +/* + * An algorithm that treats first and/or last fields in a sequence + * differently can use this to detect those cases. It is the algorithm's + * responsibility to set the value to YADIF_FIELD_NORMAL after processing + * the first field. + */ +int current_field; ///< YADIFCurrentField } YADIFContext; void ff_yadif_init_x86(YADIFContext *yadif); diff --git a/libavfilter/yadif_common.c b/libavfilter/yadif_common.c index 19e8ac5281..a10cf7a17f 100644 --- a/libavfilter/yadif_common.c +++ b/libavfilter/yadif_common.c @@ -44,6 +44,8 @@ static int return_frame(AVFilterContext *ctx, int is_second) av_frame_copy_props(yadif->out, yadif->cur); yadif->out->interlaced_frame = 0; +if (yadif->current_field == YADIF_FIELD_BACK_END) +yadif->current_field = YADIF_FIELD_END; } yadif->filter(ctx, yadif->out, tff ^ !is_second, tff); @@ -103,9 +105,12 @@ int ff_yadif_filter_frame(AVFilterLink *link, AVFrame *frame) yadif->cur = yadif->next; yadif->next = frame; -if (!yadif->cur && -!(yadif->cur = av_frame_clone(yadif->next))) -return AVERROR(ENOMEM); +if (!yadif->cur) { +yadif->cur = av_frame_clone(yadif->next); +if (!yadif->cur) +return AVERROR(ENOMEM); +yadif->current_field = YADIF_FIELD_END; +} if (checkstride(yadif, yadif->next, yadif->cur)) { av_log(ctx, AV_LOG_VERBOSE, "Reallocating frame due to differing stride\n"); @@ -173,6 +178,7 @@ int ff_yadif_request_frame(AVFilterLink *link) if (!next) return AVERROR(ENOMEM); +yadif->current_field = YADIF_FIELD_BACK_END; next->pts = yadif->next->pts * 2 - yadif->cur->pts; ff_yadif_filter_frame(ctx->inputs[0], next); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avutil/hwcontext_cuda: Define and use common CHECK_CU()
ffmpeg | branch: master | Philip Langdale | Sat Nov 10 22:47:28 2018 -0800| [19d3d0c0570981ddc8a224f07d734ff75d76e234] | committer: Philip Langdale avutil/hwcontext_cuda: Define and use common CHECK_CU() We have a pattern of wrapping CUDA calls to print errors and normalise return values that is used in a couple of places. To avoid duplication and increase consistency, let's put the wrapper implementation in a shared place and use it everywhere. Affects: * avcodec/cuviddec * avcodec/nvdec * avcodec/nvenc * avfilter/vf_scale_cuda * avfilter/vf_scale_npp * avfilter/vf_thumbnail_cuda * avfilter/vf_transpose_npp * avfilter/vf_yadif_cuda > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=19d3d0c0570981ddc8a224f07d734ff75d76e234 --- libavcodec/Makefile | 6 +- libavcodec/cuda_check.c | 1 + libavcodec/cuviddec.c | 25 +-- libavcodec/nvdec.c | 92 +++-- libavcodec/nvenc.c | 68 ++ libavfilter/Makefile| 13 ++-- libavfilter/cuda_check.c| 1 + libavfilter/vf_scale_cuda.c | 92 - libavfilter/vf_scale_npp.c | 12 ++-- libavfilter/vf_thumbnail_cuda.c | 102 +-- libavfilter/vf_transpose_npp.c | 12 ++-- libavfilter/vf_yadif_cuda.c | 97 -- libavutil/Makefile | 5 +- libavutil/cuda_check.c | 45 libavutil/cuda_check.h | 43 libavutil/hwcontext_cuda.c | 148 +++- 16 files changed, 367 insertions(+), 395 deletions(-) diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 05be02ec7d..716f26d191 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -124,7 +124,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC)+= mpegvideo_enc.o mpeg12data.o \ motion_est.o ratecontrol.o\ mpegvideoencdsp.o OBJS-$(CONFIG_MSS34DSP)+= mss34dsp.o -OBJS-$(CONFIG_NVENC) += nvenc.o +OBJS-$(CONFIG_NVENC) += nvenc.o cuda_check.o OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o OBJS-$(CONFIG_QPELDSP) += qpeldsp.o OBJS-$(CONFIG_QSV) += qsv.o @@ -346,7 +346,7 @@ OBJS-$(CONFIG_H264_DECODER)+= h264dec.o h264_cabac.o h264_cavlc.o \ h264_refs.o h264_sei.o \ h264_slice.o h264data.o OBJS-$(CONFIG_H264_AMF_ENCODER)+= amfenc_h264.o -OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o +OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o cuda_check.o OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o OBJS-$(CONFIG_H264_NVENC_ENCODER) += nvenc_h264.o @@ -852,7 +852,7 @@ OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER) += adpcmenc.o adpcm_data.o # hardware accelerators OBJS-$(CONFIG_D3D11VA)+= dxva2.o OBJS-$(CONFIG_DXVA2) += dxva2.o -OBJS-$(CONFIG_NVDEC) += nvdec.o +OBJS-$(CONFIG_NVDEC) += nvdec.o cuda_check.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VDPAU) += vdpau.o diff --git a/libavcodec/cuda_check.c b/libavcodec/cuda_check.c new file mode 100644 index 00..a1ebb2 --- /dev/null +++ b/libavcodec/cuda_check.c @@ -0,0 +1 @@ +#include "libavutil/cuda_check.c" diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index f21273c07e..03589367ce 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -25,6 +25,7 @@ #include "libavutil/mathematics.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/fifo.h" #include "libavutil/log.h" #include "libavutil/opt.h" @@ -95,29 +96,7 @@ typedef struct CuvidParsedFrame int is_deinterlacing; } CuvidParsedFrame; -static int check_cu(AVCodecContext *avctx, CUresult err, const char *func) -{ -CuvidContext *ctx = avctx->priv_data; -const char *err_name; -const char *err_string; - -av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); - -if (err == CUDA_SUCCESS) -return 0; - -ctx->cudl->cuGetErrorName(err, _name); -ctx->cudl->cuGetErrorString(err, _string); - -av_log(avctx, AV_LOG_ERROR, "%s failed", func); -if (err_name && err_string) -av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); -av_log(avctx, AV_LOG_ERROR, "\n"); - -return AVERROR_EXTERNAL; -} - -#define CHECK_CU(x) check_cu(avctx, (x),
[FFmpeg-cvslog] avfilter/vf_bwdif: Use common yadif frame management logic
ffmpeg | branch: master | Philip Langdale | Sun Nov 4 10:17:01 2018 -0800| [1096614c4200fe25b22d40216148f24ebaa5192f] | committer: Philip Langdale avfilter/vf_bwdif: Use common yadif frame management logic After adding field type management to the common yadif logic, we can remove the duplicate copy of that logic from bwdif. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1096614c4200fe25b22d40216148f24ebaa5192f --- libavfilter/bwdif.h | 34 +- libavfilter/vf_bwdif.c | 235 +++- libavfilter/x86/vf_bwdif_init.c | 3 +- 3 files changed, 41 insertions(+), 231 deletions(-) diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h index 8b42c760a0..889ff772ed 100644 --- a/libavfilter/bwdif.h +++ b/libavfilter/bwdif.h @@ -21,36 +21,10 @@ #include "libavutil/pixdesc.h" #include "avfilter.h" - -enum BWDIFMode { -BWDIF_MODE_SEND_FRAME = 0, ///< send 1 frame for each frame -BWDIF_MODE_SEND_FIELD = 1, ///< send 1 frame for each field -}; - -enum BWDIFParity { -BWDIF_PARITY_TFF = 0, ///< top field first -BWDIF_PARITY_BFF = 1, ///< bottom field first -BWDIF_PARITY_AUTO = -1, ///< auto detection -}; - -enum BWDIFDeint { -BWDIF_DEINT_ALL= 0, ///< deinterlace all frames -BWDIF_DEINT_INTERLACED = 1, ///< only deinterlace frames marked as interlaced -}; +#include "yadif.h" typedef struct BWDIFContext { -const AVClass *class; - -int mode; ///< BWDIFMode -int parity; ///< BWDIFParity -int deint; ///< BWDIFDeint - -int frame_pending; - -AVFrame *cur; -AVFrame *next; -AVFrame *prev; -AVFrame *out; +YADIFContext yadif; void (*filter_intra)(void *dst1, void *cur1, int w, int prefs, int mrefs, int prefs3, int mrefs3, int parity, int clip_max); @@ -61,10 +35,6 @@ typedef struct BWDIFContext { void (*filter_edge)(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int prefs2, int mrefs2, int parity, int clip_max, int spat); - -const AVPixFmtDescriptor *csp; -int inter_field; -int eof; } BWDIFContext; void ff_bwdif_init_x86(BWDIFContext *bwdif); diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c index b691983611..37165584cf 100644 --- a/libavfilter/vf_bwdif.c +++ b/libavfilter/vf_bwdif.c @@ -216,10 +216,11 @@ static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1, static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { BWDIFContext *s = ctx->priv; +YADIFContext *yadif = >yadif; ThreadData *td = arg; -int linesize = s->cur->linesize[td->plane]; -int clip_max = (1 << (s->csp->comp[td->plane].depth)) - 1; -int df = (s->csp->comp[td->plane].depth + 7) / 8; +int linesize = yadif->cur->linesize[td->plane]; +int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1; +int df = (yadif->csp->comp[td->plane].depth + 7) / 8; int refs = linesize / df; int slice_start = (td->h * jobnr ) / nb_jobs; int slice_end = (td->h * (jobnr+1)) / nb_jobs; @@ -227,11 +228,11 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) for (y = slice_start; y < slice_end; y++) { if ((y ^ td->parity) & 1) { -uint8_t *prev = >prev->data[td->plane][y * linesize]; -uint8_t *cur = >cur ->data[td->plane][y * linesize]; -uint8_t *next = >next->data[td->plane][y * linesize]; +uint8_t *prev = >prev->data[td->plane][y * linesize]; +uint8_t *cur = >cur ->data[td->plane][y * linesize]; +uint8_t *next = >next->data[td->plane][y * linesize]; uint8_t *dst = >frame->data[td->plane][y * td->frame->linesize[td->plane]]; -if (!s->inter_field) { +if (yadif->current_field == YADIF_FIELD_END) { s->filter_intra(dst, cur, td->w, (y + df) < td->h ? refs : -refs, y > (df - 1) ? -refs : refs, (y + 3*df) < td->h ? 3 * refs : -refs, @@ -252,7 +253,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) } } else { memcpy(>frame->data[td->plane][y * td->frame->linesize[td->plane]], - >cur->data[td->plane][y * linesize], td->w * df); + >cur->data[td->plane][y * linesize], td->w * df); } } return 0; @@ -262,16 +263,17 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
[FFmpeg-cvslog] avcodec/nvdec: Increase frame pool size to help deinterlacing
ffmpeg | branch: release/4.1 | Philip Langdale | Wed Oct 24 18:38:44 2018 -0700| [6feec11e489b729a0ed7ead205e2aca6837d5f20] | committer: Philip Langdale avcodec/nvdec: Increase frame pool size to help deinterlacing With the cuda yadif filter in use, the number of mapped decoder frames could increase by two, as the filter holds on to additional frames. (cherry picked from commit 1b41115ef70896d9b98ce842dc5f21c465396ce2) > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6feec11e489b729a0ed7ead205e2aca6837d5f20 --- libavcodec/nvdec.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 4dd6b1acf3..0426c9b319 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -601,7 +601,11 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, frames_ctx->format= AV_PIX_FMT_CUDA; frames_ctx->width = (avctx->coded_width + 1) & ~1; frames_ctx->height= (avctx->coded_height + 1) & ~1; -frames_ctx->initial_pool_size = dpb_size; +/* + * We add two extra frames to the pool to account for deinterlacing filters + * holding onto their frames. + */ +frames_ctx->initial_pool_size = dpb_size + 2; frames_ctx->free = nvdec_free_dummy; frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avfilter/vf_yadif_cuda: CUDA accelerated yadif deinterlacer
ffmpeg | branch: release/4.1 | Philip Langdale | Sun Oct 21 13:49:16 2018 -0700| [67126555fc030e465806a84084e710f20c8a4775] | committer: Philip Langdale avfilter/vf_yadif_cuda: CUDA accelerated yadif deinterlacer This is a cuda implementation of yadif, which gives us a way to do deinterlacing when using the nvdec hwaccel. In that scenario we don't have access to the nvidia deinterlacer. (cherry picked from commit d5272e94ab22bfc8f01fa3174e2c4664161ddf5a) > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=67126555fc030e465806a84084e710f20c8a4775 --- Changelog| 1 + configure| 1 + doc/filters.texi | 58 ++ libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/version.h| 2 +- libavfilter/vf_yadif_cuda.c | 426 +++ libavfilter/vf_yadif_cuda.cu | 296 ++ 8 files changed, 785 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index 36c00b456a..97ea8e12c3 100644 --- a/Changelog +++ b/Changelog @@ -42,6 +42,7 @@ version 4.1: - xstack filter - pcm vidc decoder and encoder - (a)graphmonitor filter +- yadif_cuda filter version 4.0: diff --git a/configure b/configure index 01c3a1011d..5a5d0b0868 100755 --- a/configure +++ b/configure @@ -3481,6 +3481,7 @@ zscale_filter_deps="libzimg const_nan" scale_vaapi_filter_deps="vaapi" vpp_qsv_filter_deps="libmfx" vpp_qsv_filter_select="qsvvpp" +yadif_cuda_filter_deps="cuda_sdk" # examples avio_dir_cmd_deps="avformat avutil" diff --git a/doc/filters.texi b/doc/filters.texi index 4345a4931b..5d4bfd2e8e 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -17992,6 +17992,64 @@ Only deinterlace frames marked as interlaced. The default value is @code{all}. @end table +@section yadif_cuda + +Deinterlace the input video using the @ref{yadif} algorithm, but implemented +in CUDA so that it can work as part of a GPU accelerated pipeline with nvdec +and/or nvenc. + +It accepts the following parameters: + + +@table @option + +@item mode +The interlacing mode to adopt. It accepts one of the following values: + +@table @option +@item 0, send_frame +Output one frame for each frame. +@item 1, send_field +Output one frame for each field. +@item 2, send_frame_nospatial +Like @code{send_frame}, but it skips the spatial interlacing check. +@item 3, send_field_nospatial +Like @code{send_field}, but it skips the spatial interlacing check. +@end table + +The default value is @code{send_frame}. + +@item parity +The picture field parity assumed for the input interlaced video. It accepts one +of the following values: + +@table @option +@item 0, tff +Assume the top field is first. +@item 1, bff +Assume the bottom field is first. +@item -1, auto +Enable automatic detection of field parity. +@end table + +The default value is @code{auto}. +If the interlacing is unknown or the decoder does not export this information, +top field first will be assumed. + +@item deint +Specify which frames to deinterlace. Accept one of the following +values: + +@table @option +@item 0, all +Deinterlace all frames. +@item 1, interlaced +Only deinterlace frames marked as interlaced. +@end table + +The default value is @code{all}. +@end table + @section zoompan Apply Zoom & Pan effect. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index ffbcb40806..4b78b29fad 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -408,6 +408,7 @@ OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o OBJS-$(CONFIG_XBR_FILTER)+= vf_xbr.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o +OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o yadif_common.o OBJS-$(CONFIG_ZMQ_FILTER)+= f_zmq.o OBJS-$(CONFIG_ZOOMPAN_FILTER)+= vf_zoompan.o OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index d5a211bda5..c40c7e3a3c 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -389,6 +389,7 @@ extern AVFilter ff_vf_weave; extern AVFilter ff_vf_xbr; extern AVFilter ff_vf_xstack; extern AVFilter ff_vf_yadif; +extern AVFilter ff_vf_yadif_cuda; extern AVFilter ff_vf_zmq; extern AVFilter ff_vf_zoompan; extern AVFilter ff_vf_zscale; diff --git a/libavfilter/version.h b/libavfilter/version.h index 91c37509b4..9f0a9966eb 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -31,7 +31,7 @@ #define LIBAVFILTER_VERSION_MAJOR 7 #define LIBAVFILTER_VERSION_MINOR 40 -#define LIBAVFILTER_VERSION_MICRO 100 +#define LIBAVFILTER_VERSION_MICRO 101 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
[FFmpeg-cvslog] avfilter/vf_cuda_yadif: Avoid new syntax for vector initialisation
ffmpeg | branch: release/4.1 | Philip Langdale | Fri Nov 2 14:08:18 2018 -0700| [ebc1c49e417cf7d7096d7a038d1e3e61f0432f19] | committer: Philip Langdale avfilter/vf_cuda_yadif: Avoid new syntax for vector initialisation This requires a newer version of CUDA than we want to require. (cherry picked from commit 8e50215b5e02074b0773dfcf55867654ee59c179) > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ebc1c49e417cf7d7096d7a038d1e3e61f0432f19 --- libavfilter/vf_yadif_cuda.cu | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_yadif_cuda.cu b/libavfilter/vf_yadif_cuda.cu index 65a902c66b..12e7e4a443 100644 --- a/libavfilter/vf_yadif_cuda.cu +++ b/libavfilter/vf_yadif_cuda.cu @@ -201,9 +201,11 @@ __inline__ __device__ void yadif_double(T *dst, T m = tex2D(cur, xo + 2, yo + 1); T n = tex2D(cur, xo + 3, yo + 1); -T spatial_pred = { -spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x, i.x, j.x, k.x, l.x, m.x, n.x), -spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y, i.y, j.y, k.y, l.y, m.y, n.y) }; +T spatial_pred; +spatial_pred.x = +spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x, i.x, j.x, k.x, l.x, m.x, n.x); +spatial_pred.y = +spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y, i.y, j.y, k.y, l.y, m.y, n.y); // Calculate temporal prediction int is_second_field = !(parity ^ tff); @@ -226,11 +228,12 @@ __inline__ __device__ void yadif_double(T *dst, T K = tex2D(next2, xo, yo - 1); T L = tex2D(next2, xo, yo + 1); -spatial_pred = { +spatial_pred.x = temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x, G.x, H.x, I.x, J.x, K.x, L.x, - spatial_pred.x, skip_spatial_check), + spatial_pred.x, skip_spatial_check); +spatial_pred.y = temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y, G.y, H.y, I.y, J.y, K.y, L.y, - spatial_pred.y, skip_spatial_check) }; + spatial_pred.y, skip_spatial_check); dst[yo*dst_pitch+xo] = spatial_pred; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] libavfilter/vf_yadif: Make frame management logic and options shareable
ffmpeg | branch: release/4.1 | Philip Langdale | Wed Oct 24 11:52:42 2018 -0700| [041231fcd632a33506689e44e5b45f29ac4ce050] | committer: Philip Langdale libavfilter/vf_yadif: Make frame management logic and options shareable I'm writing a cuda implementation of yadif, and while this obviously has a very different implementation of the actual filtering, all the frame management is unchanged. To avoid duplicating that logic, let's make it shareable. From the perspective of the existing filter, the only real change is introducing a function pointer for the filter() function so it can be specified for the specific filter. (cherry picked from commit 598f0f39271d6033588b4d8ccc672c5bdc85fec7) > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=041231fcd632a33506689e44e5b45f29ac4ce050 --- libavfilter/Makefile | 2 +- libavfilter/vf_yadif.c | 196 ++ libavfilter/yadif.h| 9 ++ libavfilter/yadif_common.c | 209 + 4 files changed, 228 insertions(+), 188 deletions(-) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index c35cd8f422..ffbcb40806 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -407,7 +407,7 @@ OBJS-$(CONFIG_WAVEFORM_FILTER) += vf_waveform.o OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o OBJS-$(CONFIG_XBR_FILTER)+= vf_xbr.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o -OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o +OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o OBJS-$(CONFIG_ZMQ_FILTER)+= f_zmq.o OBJS-$(CONFIG_ZOOMPAN_FILTER)+= vf_zoompan.o OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c index f58d8ac2bc..3107924932 100644 --- a/libavfilter/vf_yadif.c +++ b/libavfilter/vf_yadif.c @@ -22,7 +22,6 @@ #include "libavutil/avassert.h" #include "libavutil/cpu.h" #include "libavutil/common.h" -#include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "libavutil/imgutils.h" #include "avfilter.h" @@ -254,166 +253,6 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic, emms_c(); } -static int return_frame(AVFilterContext *ctx, int is_second) -{ -YADIFContext *yadif = ctx->priv; -AVFilterLink *link = ctx->outputs[0]; -int tff, ret; - -if (yadif->parity == -1) { -tff = yadif->cur->interlaced_frame ? - yadif->cur->top_field_first : 1; -} else { -tff = yadif->parity ^ 1; -} - -if (is_second) { -yadif->out = ff_get_video_buffer(link, link->w, link->h); -if (!yadif->out) -return AVERROR(ENOMEM); - -av_frame_copy_props(yadif->out, yadif->cur); -yadif->out->interlaced_frame = 0; -} - -filter(ctx, yadif->out, tff ^ !is_second, tff); - -if (is_second) { -int64_t cur_pts = yadif->cur->pts; -int64_t next_pts = yadif->next->pts; - -if (next_pts != AV_NOPTS_VALUE && cur_pts != AV_NOPTS_VALUE) { -yadif->out->pts = cur_pts + next_pts; -} else { -yadif->out->pts = AV_NOPTS_VALUE; -} -} -ret = ff_filter_frame(ctx->outputs[0], yadif->out); - -yadif->frame_pending = (yadif->mode&1) && !is_second; -return ret; -} - -static int checkstride(YADIFContext *yadif, const AVFrame *a, const AVFrame *b) -{ -int i; -for (i = 0; i < yadif->csp->nb_components; i++) -if (a->linesize[i] != b->linesize[i]) -return 1; -return 0; -} - -static void fixstride(AVFilterLink *link, AVFrame *f) -{ -AVFrame *dst = ff_default_get_video_buffer(link, f->width, f->height); -if(!dst) -return; -av_frame_copy_props(dst, f); -av_image_copy(dst->data, dst->linesize, - (const uint8_t **)f->data, f->linesize, - dst->format, dst->width, dst->height); -av_frame_unref(f); -av_frame_move_ref(f, dst); -av_frame_free(); -} - -static int filter_frame(AVFilterLink *link, AVFrame *frame) -{ -AVFilterContext *ctx = link->dst; -YADIFContext *yadif = ctx->priv; - -av_assert0(frame); - -if (yadif->frame_pending) -return_frame(ctx, 1); - -if (yadif->prev) -av_frame_free(>prev); -yadif->prev = yadif->cur; -yadif->cur = yadif->next; -yadif->next = frame; - -if (!yadif->cur && -!(yadif->cur = av_frame_clone(yadif->next))) -return AVERROR(ENOMEM); - -if (checkstride(yadif, yadif->next, yadif->cur)) { -av_log(ctx, AV_LOG_VERBOSE, "Reallocating
[FFmpeg-cvslog] avfilter/vf_cuda_yadif: Avoid new syntax for vector initialisation
ffmpeg | branch: master | Philip Langdale | Fri Nov 2 14:08:18 2018 -0700| [8e50215b5e02074b0773dfcf55867654ee59c179] | committer: Philip Langdale avfilter/vf_cuda_yadif: Avoid new syntax for vector initialisation This requires a newer version of CUDA than we want to require. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8e50215b5e02074b0773dfcf55867654ee59c179 --- libavfilter/vf_yadif_cuda.cu | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_yadif_cuda.cu b/libavfilter/vf_yadif_cuda.cu index 65a902c66b..12e7e4a443 100644 --- a/libavfilter/vf_yadif_cuda.cu +++ b/libavfilter/vf_yadif_cuda.cu @@ -201,9 +201,11 @@ __inline__ __device__ void yadif_double(T *dst, T m = tex2D(cur, xo + 2, yo + 1); T n = tex2D(cur, xo + 3, yo + 1); -T spatial_pred = { -spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x, i.x, j.x, k.x, l.x, m.x, n.x), -spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y, i.y, j.y, k.y, l.y, m.y, n.y) }; +T spatial_pred; +spatial_pred.x = +spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x, i.x, j.x, k.x, l.x, m.x, n.x); +spatial_pred.y = +spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y, i.y, j.y, k.y, l.y, m.y, n.y); // Calculate temporal prediction int is_second_field = !(parity ^ tff); @@ -226,11 +228,12 @@ __inline__ __device__ void yadif_double(T *dst, T K = tex2D(next2, xo, yo - 1); T L = tex2D(next2, xo, yo + 1); -spatial_pred = { +spatial_pred.x = temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x, G.x, H.x, I.x, J.x, K.x, L.x, - spatial_pred.x, skip_spatial_check), + spatial_pred.x, skip_spatial_check); +spatial_pred.y = temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y, G.y, H.y, I.y, J.y, K.y, L.y, - spatial_pred.y, skip_spatial_check) }; + spatial_pred.y, skip_spatial_check); dst[yo*dst_pitch+xo] = spatial_pred; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec: Increase frame pool size to help deinterlacing
ffmpeg | branch: master | Philip Langdale | Wed Oct 24 18:38:44 2018 -0700| [1b41115ef70896d9b98ce842dc5f21c465396ce2] | committer: Philip Langdale avcodec/nvdec: Increase frame pool size to help deinterlacing With the cuda yadif filter in use, the number of mapped decoder frames could increase by two, as the filter holds on to additional frames. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1b41115ef70896d9b98ce842dc5f21c465396ce2 --- libavcodec/nvdec.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 4dd6b1acf3..0426c9b319 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -601,7 +601,11 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, frames_ctx->format= AV_PIX_FMT_CUDA; frames_ctx->width = (avctx->coded_width + 1) & ~1; frames_ctx->height= (avctx->coded_height + 1) & ~1; -frames_ctx->initial_pool_size = dpb_size; +/* + * We add two extra frames to the pool to account for deinterlacing filters + * holding onto their frames. + */ +frames_ctx->initial_pool_size = dpb_size + 2; frames_ctx->free = nvdec_free_dummy; frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] libavfilter/vf_yadif: Make frame management logic and options shareable
ffmpeg | branch: master | Philip Langdale | Wed Oct 24 11:52:42 2018 -0700| [598f0f39271d6033588b4d8ccc672c5bdc85fec7] | committer: Philip Langdale libavfilter/vf_yadif: Make frame management logic and options shareable I'm writing a cuda implementation of yadif, and while this obviously has a very different implementation of the actual filtering, all the frame management is unchanged. To avoid duplicating that logic, let's make it shareable. From the perspective of the existing filter, the only real change is introducing a function pointer for the filter() function so it can be specified for the specific filter. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=598f0f39271d6033588b4d8ccc672c5bdc85fec7 --- libavfilter/Makefile | 2 +- libavfilter/vf_yadif.c | 196 ++ libavfilter/yadif.h| 9 ++ libavfilter/yadif_common.c | 209 + 4 files changed, 228 insertions(+), 188 deletions(-) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 51e48efc2e..4b96d805fd 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -408,7 +408,7 @@ OBJS-$(CONFIG_WAVEFORM_FILTER) += vf_waveform.o OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o OBJS-$(CONFIG_XBR_FILTER)+= vf_xbr.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o -OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o +OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o OBJS-$(CONFIG_ZMQ_FILTER)+= f_zmq.o OBJS-$(CONFIG_ZOOMPAN_FILTER)+= vf_zoompan.o OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c index f58d8ac2bc..3107924932 100644 --- a/libavfilter/vf_yadif.c +++ b/libavfilter/vf_yadif.c @@ -22,7 +22,6 @@ #include "libavutil/avassert.h" #include "libavutil/cpu.h" #include "libavutil/common.h" -#include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "libavutil/imgutils.h" #include "avfilter.h" @@ -254,166 +253,6 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic, emms_c(); } -static int return_frame(AVFilterContext *ctx, int is_second) -{ -YADIFContext *yadif = ctx->priv; -AVFilterLink *link = ctx->outputs[0]; -int tff, ret; - -if (yadif->parity == -1) { -tff = yadif->cur->interlaced_frame ? - yadif->cur->top_field_first : 1; -} else { -tff = yadif->parity ^ 1; -} - -if (is_second) { -yadif->out = ff_get_video_buffer(link, link->w, link->h); -if (!yadif->out) -return AVERROR(ENOMEM); - -av_frame_copy_props(yadif->out, yadif->cur); -yadif->out->interlaced_frame = 0; -} - -filter(ctx, yadif->out, tff ^ !is_second, tff); - -if (is_second) { -int64_t cur_pts = yadif->cur->pts; -int64_t next_pts = yadif->next->pts; - -if (next_pts != AV_NOPTS_VALUE && cur_pts != AV_NOPTS_VALUE) { -yadif->out->pts = cur_pts + next_pts; -} else { -yadif->out->pts = AV_NOPTS_VALUE; -} -} -ret = ff_filter_frame(ctx->outputs[0], yadif->out); - -yadif->frame_pending = (yadif->mode&1) && !is_second; -return ret; -} - -static int checkstride(YADIFContext *yadif, const AVFrame *a, const AVFrame *b) -{ -int i; -for (i = 0; i < yadif->csp->nb_components; i++) -if (a->linesize[i] != b->linesize[i]) -return 1; -return 0; -} - -static void fixstride(AVFilterLink *link, AVFrame *f) -{ -AVFrame *dst = ff_default_get_video_buffer(link, f->width, f->height); -if(!dst) -return; -av_frame_copy_props(dst, f); -av_image_copy(dst->data, dst->linesize, - (const uint8_t **)f->data, f->linesize, - dst->format, dst->width, dst->height); -av_frame_unref(f); -av_frame_move_ref(f, dst); -av_frame_free(); -} - -static int filter_frame(AVFilterLink *link, AVFrame *frame) -{ -AVFilterContext *ctx = link->dst; -YADIFContext *yadif = ctx->priv; - -av_assert0(frame); - -if (yadif->frame_pending) -return_frame(ctx, 1); - -if (yadif->prev) -av_frame_free(>prev); -yadif->prev = yadif->cur; -yadif->cur = yadif->next; -yadif->next = frame; - -if (!yadif->cur && -!(yadif->cur = av_frame_clone(yadif->next))) -return AVERROR(ENOMEM); - -if (checkstride(yadif, yadif->next, yadif->cur)) { -av_log(ctx, AV_LOG_VERBOSE, "Reallocating frame due to differing stride\n"); -fixstride(link, yadif
[FFmpeg-cvslog] avfilter/vf_yadif_cuda: CUDA accelerated yadif deinterlacer
ffmpeg | branch: master | Philip Langdale | Sun Oct 21 13:49:16 2018 -0700| [d5272e94ab22bfc8f01fa3174e2c4664161ddf5a] | committer: Philip Langdale avfilter/vf_yadif_cuda: CUDA accelerated yadif deinterlacer This is a cuda implementation of yadif, which gives us a way to do deinterlacing when using the nvdec hwaccel. In that scenario we don't have access to the nvidia deinterlacer. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d5272e94ab22bfc8f01fa3174e2c4664161ddf5a --- Changelog| 1 + configure| 1 + doc/filters.texi | 58 ++ libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/version.h| 2 +- libavfilter/vf_yadif_cuda.c | 426 +++ libavfilter/vf_yadif_cuda.cu | 296 ++ 8 files changed, 785 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index fe5692f93f..640ffe7c12 100644 --- a/Changelog +++ b/Changelog @@ -45,6 +45,7 @@ version 4.1: - xstack filter - pcm vidc decoder and encoder - (a)graphmonitor filter +- yadif_cuda filter version 4.0: diff --git a/configure b/configure index 2606b885b0..f3fa0cde86 100755 --- a/configure +++ b/configure @@ -3482,6 +3482,7 @@ zscale_filter_deps="libzimg const_nan" scale_vaapi_filter_deps="vaapi" vpp_qsv_filter_deps="libmfx" vpp_qsv_filter_select="qsvvpp" +yadif_cuda_filter_deps="cuda_sdk" # examples avio_dir_cmd_deps="avformat avutil" diff --git a/doc/filters.texi b/doc/filters.texi index e67b8a647d..e1798a53ef 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -18032,6 +18032,64 @@ Only deinterlace frames marked as interlaced. The default value is @code{all}. @end table +@section yadif_cuda + +Deinterlace the input video using the @ref{yadif} algorithm, but implemented +in CUDA so that it can work as part of a GPU accelerated pipeline with nvdec +and/or nvenc. + +It accepts the following parameters: + + +@table @option + +@item mode +The interlacing mode to adopt. It accepts one of the following values: + +@table @option +@item 0, send_frame +Output one frame for each frame. +@item 1, send_field +Output one frame for each field. +@item 2, send_frame_nospatial +Like @code{send_frame}, but it skips the spatial interlacing check. +@item 3, send_field_nospatial +Like @code{send_field}, but it skips the spatial interlacing check. +@end table + +The default value is @code{send_frame}. + +@item parity +The picture field parity assumed for the input interlaced video. It accepts one +of the following values: + +@table @option +@item 0, tff +Assume the top field is first. +@item 1, bff +Assume the bottom field is first. +@item -1, auto +Enable automatic detection of field parity. +@end table + +The default value is @code{auto}. +If the interlacing is unknown or the decoder does not export this information, +top field first will be assumed. + +@item deint +Specify which frames to deinterlace. Accept one of the following +values: + +@table @option +@item 0, all +Deinterlace all frames. +@item 1, interlaced +Only deinterlace frames marked as interlaced. +@end table + +The default value is @code{all}. +@end table + @section zoompan Apply Zoom & Pan effect. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 4b96d805fd..79a89a1ab1 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -409,6 +409,7 @@ OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o OBJS-$(CONFIG_XBR_FILTER)+= vf_xbr.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o +OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o yadif_common.o OBJS-$(CONFIG_ZMQ_FILTER)+= f_zmq.o OBJS-$(CONFIG_ZOOMPAN_FILTER)+= vf_zoompan.o OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 6052cb8c3c..484b080dea 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -390,6 +390,7 @@ extern AVFilter ff_vf_weave; extern AVFilter ff_vf_xbr; extern AVFilter ff_vf_xstack; extern AVFilter ff_vf_yadif; +extern AVFilter ff_vf_yadif_cuda; extern AVFilter ff_vf_zmq; extern AVFilter ff_vf_zoompan; extern AVFilter ff_vf_zscale; diff --git a/libavfilter/version.h b/libavfilter/version.h index 2ddbf735ea..83b18008ce 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -30,7 +30,7 @@ #include "libavutil/version.h" #define LIBAVFILTER_VERSION_MAJOR 7 -#define LIBAVFILTER_VERSION_MINOR 42 +#define LIBAVFILTER_VERSION_MINOR 43 #define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif
[FFmpeg-cvslog] avcodec/vdpau: Initialise driver version variable explicitly
ffmpeg | branch: master | Philip Langdale | Wed Oct 31 16:58:09 2018 -0700| [4a976200d7853588336005a394dd31d905f5caf6] | committer: Philip Langdale avcodec/vdpau: Initialise driver version variable explicitly If the identification string ever changed, we might not match anything in the sscanf(). At least have predictable behaviour. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4a976200d7853588336005a394dd31d905f5caf6 --- libavcodec/vdpau.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c index 59454e8c4b..167f06d7ae 100644 --- a/libavcodec/vdpau.c +++ b/libavcodec/vdpau.c @@ -208,7 +208,7 @@ int ff_vdpau_common_init(AVCodecContext *avctx, VdpDecoderProfile profile, return vdpau_error(status); if (avctx->codec_id == AV_CODEC_ID_HEVC && strncmp(info_string, "NVIDIA ", 7) == 0 && !(avctx->hwaccel_flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH)) { -int driver_version; +int driver_version = 0; sscanf(info_string, "NVIDIA VDPAU Driver Shared Library %d", _version); if (driver_version < 410) { av_log(avctx, AV_LOG_VERBOSE, "HEVC with NVIDIA VDPAU drivers is buggy, skipping.\n"); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec: Push the context before destroying the decoder
ffmpeg | branch: master | Philip Langdale | Wed Oct 24 10:39:13 2018 -0700| [2d0ee127be14f44df19790d3aab5b551319f4a62] | committer: Philip Langdale avcodec/nvdec: Push the context before destroying the decoder This has no visible effects but I happened to run under the cuda memcheck tool and it called it out as an error. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2d0ee127be14f44df19790d3aab5b551319f4a62 --- libavcodec/nvdec.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index e779be3a45..4dd6b1acf3 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -149,8 +149,12 @@ static void nvdec_decoder_free(void *opaque, uint8_t *data) { NVDECDecoder *decoder = (NVDECDecoder*)data; -if (decoder->decoder) +if (decoder->decoder) { +CUcontext dummy; +decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); decoder->cvdl->cuvidDestroyDecoder(decoder->decoder); +decoder->cudl->cuCtxPopCurrent(); +} av_buffer_unref(>hw_device_ref); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec_hevc: fix scaling lists
ffmpeg | branch: release/4.0 | Philip Langdale <phil...@overt.org> | Wed May 9 18:48:59 2018 -0700| [2a44f706aa14eefd503ca4e3c1d3aba21f991cab] | committer: Timo Rothenpieler avcodec/nvdec_hevc: fix scaling lists The main issue here was the use of [i] instead of [i * 3] for the 32x32 matrix. As part of fixing this, I changed the code to match that used in vdpau_hevc, which I spent a lot of time verifying. I also changed to calculating NumPocTotalCurr using the existing helper, which is what vdpau does. Signed-off-by: Timo Rothenpieler <t...@rothenpieler.org> (cherry picked from commit 1261003700322789d62a892e3325f8b58349d051) > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2a44f706aa14eefd503ca4e3c1d3aba21f991cab --- libavcodec/nvdec_hevc.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/nvdec_hevc.c b/libavcodec/nvdec_hevc.c index 008963130b..e04a701f3a 100644 --- a/libavcodec/nvdec_hevc.c +++ b/libavcodec/nvdec_hevc.c @@ -58,12 +58,13 @@ static void fill_scaling_lists(CUVIDHEVCPICPARAMS *ppc, const HEVCContext *s) ppc->ScalingList16x16[i][j] = sl->sl[2][i][pos]; if (i < 2) -ppc->ScalingList32x32[i][j] = sl->sl[3][i][pos]; +ppc->ScalingList32x32[i][j] = sl->sl[3][i * 3][pos]; } -} -memcpy(ppc->ScalingListDCCoeff16x16, sl->sl_dc[0], sizeof(ppc->ScalingListDCCoeff16x16)); -memcpy(ppc->ScalingListDCCoeff32x32, sl->sl_dc[1], sizeof(ppc->ScalingListDCCoeff32x32)); +ppc->ScalingListDCCoeff16x16[i] = sl->sl_dc[0][i]; +if (i < 2) +ppc->ScalingListDCCoeff32x32[i] = sl->sl_dc[1][i * 3]; +} } static int nvdec_hevc_start_frame(AVCodecContext *avctx, @@ -166,8 +167,7 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx, .NumBitsForShortTermRPSInSlice= s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0, .NumDeltaPocsOfRefRpsIdx = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0, -.NumPocTotalCurr = s->rps[ST_CURR_BEF].nb_refs + s->rps[ST_CURR_AFT].nb_refs + - s->rps[LT_CURR].nb_refs, +.NumPocTotalCurr = ff_hevc_frame_nb_refs(s), .NumPocStCurrBefore = s->rps[ST_CURR_BEF].nb_refs, .NumPocStCurrAfter= s->rps[ST_CURR_AFT].nb_refs, .NumPocLtCurr = s->rps[LT_CURR].nb_refs, ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec_hevc: fix scaling lists
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Wed May 9 18:48:59 2018 -0700| [1261003700322789d62a892e3325f8b58349d051] | committer: Timo Rothenpieler avcodec/nvdec_hevc: fix scaling lists The main issue here was the use of [i] instead of [i * 3] for the 32x32 matrix. As part of fixing this, I changed the code to match that used in vdpau_hevc, which I spent a lot of time verifying. I also changed to calculating NumPocTotalCurr using the existing helper, which is what vdpau does. Signed-off-by: Timo Rothenpieler <t...@rothenpieler.org> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1261003700322789d62a892e3325f8b58349d051 --- libavcodec/nvdec_hevc.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/nvdec_hevc.c b/libavcodec/nvdec_hevc.c index 008963130b..e04a701f3a 100644 --- a/libavcodec/nvdec_hevc.c +++ b/libavcodec/nvdec_hevc.c @@ -58,12 +58,13 @@ static void fill_scaling_lists(CUVIDHEVCPICPARAMS *ppc, const HEVCContext *s) ppc->ScalingList16x16[i][j] = sl->sl[2][i][pos]; if (i < 2) -ppc->ScalingList32x32[i][j] = sl->sl[3][i][pos]; +ppc->ScalingList32x32[i][j] = sl->sl[3][i * 3][pos]; } -} -memcpy(ppc->ScalingListDCCoeff16x16, sl->sl_dc[0], sizeof(ppc->ScalingListDCCoeff16x16)); -memcpy(ppc->ScalingListDCCoeff32x32, sl->sl_dc[1], sizeof(ppc->ScalingListDCCoeff32x32)); +ppc->ScalingListDCCoeff16x16[i] = sl->sl_dc[0][i]; +if (i < 2) +ppc->ScalingListDCCoeff32x32[i] = sl->sl_dc[1][i * 3]; +} } static int nvdec_hevc_start_frame(AVCodecContext *avctx, @@ -166,8 +167,7 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx, .NumBitsForShortTermRPSInSlice= s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0, .NumDeltaPocsOfRefRpsIdx = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0, -.NumPocTotalCurr = s->rps[ST_CURR_BEF].nb_refs + s->rps[ST_CURR_AFT].nb_refs + - s->rps[LT_CURR].nb_refs, +.NumPocTotalCurr = ff_hevc_frame_nb_refs(s), .NumPocStCurrBefore = s->rps[ST_CURR_BEF].nb_refs, .NumPocStCurrAfter= s->rps[ST_CURR_AFT].nb_refs, .NumPocLtCurr = s->rps[LT_CURR].nb_refs, ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] swscale: Introduce a helper to identify semi-planar formats
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Fri Mar 2 09:09:05 2018 -0800| [dd3f1e3a1186ac2cd3d4ce71944626a2c3f200f1] | committer: Philip Langdale swscale: Introduce a helper to identify semi-planar formats This cleans up the ever-more-unreadable list of semi-planar exclusions for selecting the planar copy wrapper. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dd3f1e3a1186ac2cd3d4ce71944626a2c3f200f1 --- libswscale/swscale_internal.h | 11 +++ libswscale/swscale_unscaled.c | 7 +-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 0f51df95d7..c9120d8f5f 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -676,6 +676,17 @@ static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt)); } +/* + * Identity semi-planar YUV formats. Specifically, those are YUV formats + * where the second and third components (U & V) are on the same plane. + */ +static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt) +{ +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); +av_assert0(desc); +return (isPlanarYUV(pix_fmt) && desc->comp[1].plane == desc->comp[2].plane); +} + static av_always_inline int isRGB(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 766c9b4872..13f9cd83e3 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1930,12 +1930,7 @@ void ff_get_unscaled_swscale(SwsContext *c) (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) && c->chrDstHSubSample == c->chrSrcHSubSample && c->chrDstVSubSample == c->chrSrcVSubSample && - dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && - dstFormat != AV_PIX_FMT_P010LE && dstFormat != AV_PIX_FMT_P010BE && - dstFormat != AV_PIX_FMT_P016LE && dstFormat != AV_PIX_FMT_P016BE && - srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21 && - srcFormat != AV_PIX_FMT_P010LE && srcFormat != AV_PIX_FMT_P010BE && - srcFormat != AV_PIX_FMT_P016LE && srcFormat != AV_PIX_FMT_P016BE)) + !isSemiPlanarYUV(srcFormat) && !isSemiPlanarYUV(dstFormat))) { if (isPacked(c->srcFormat)) c->swscale = packedCopyWrapper; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/hevcdec: Declare that nvdec supports 12bit decoding
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Feb 25 09:44:23 2018 -0800| [e990713ff9e39418318b2ca8dd8ab432e5e55c7c] | committer: Philip Langdale avcodec/hevcdec: Declare that nvdec supports 12bit decoding > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e990713ff9e39418318b2ca8dd8ab432e5e55c7c --- libavcodec/hevcdec.c | 5 + libavcodec/version.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index fc4eb781dc..c8877626d2 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -408,6 +408,11 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) *fmt++ = AV_PIX_FMT_CUDA; #endif break; +case AV_PIX_FMT_YUV420P12: +#if CONFIG_HEVC_NVDEC_HWACCEL +*fmt++ = AV_PIX_FMT_CUDA; +#endif +break; } *fmt++ = sps->pix_fmt; diff --git a/libavcodec/version.h b/libavcodec/version.h index ca18ce6e8b..1e84410d68 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 58 #define LIBAVCODEC_VERSION_MINOR 13 -#define LIBAVCODEC_VERSION_MICRO 100 +#define LIBAVCODEC_VERSION_MICRO 101 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] swscale: Add p016 output support and generalise yuv420p1x to p010
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Thu Mar 1 20:16:48 2018 -0800| [9d5aff09a7163b17ec98f8c712ddde7727372dbc] | committer: Philip Langdale swscale: Add p016 output support and generalise yuv420p1x to p010 To make the best use of existing code, I generalised the wrapper that currently does yuv420p10 to p010 to support any mixture of input and output sizes between 10 and 16 bits. This had the side effect of yielding a working code path for all yuv420p1x formats to p01x. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9d5aff09a7163b17ec98f8c712ddde7727372dbc --- libswscale/output.c | 31 + libswscale/swscale_unscaled.c | 35 +++-- libswscale/utils.c | 4 ++-- libswscale/version.h| 2 +- tests/ref/fate/filter-pixdesc-p016be| 1 + tests/ref/fate/filter-pixdesc-p016le| 1 + tests/ref/fate/filter-pixfmts-copy | 2 ++ tests/ref/fate/filter-pixfmts-crop | 2 ++ tests/ref/fate/filter-pixfmts-field | 2 ++ tests/ref/fate/filter-pixfmts-hflip | 2 ++ tests/ref/fate/filter-pixfmts-il| 2 ++ tests/ref/fate/filter-pixfmts-null | 2 ++ tests/ref/fate/filter-pixfmts-pad | 1 + tests/ref/fate/filter-pixfmts-scale | 2 ++ tests/ref/fate/filter-pixfmts-transpose | 2 ++ tests/ref/fate/filter-pixfmts-vflip | 2 ++ 16 files changed, 80 insertions(+), 13 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index f30bce8dd3..0af2fffea4 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -180,6 +180,34 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize, } } +static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, + const int16_t **chrUSrc, const int16_t **chrVSrc, + uint8_t *dest8, int chrDstW) +{ +uint16_t *dest = (uint16_t*)dest8; +const int32_t **uSrc = (const int32_t **)chrUSrc; +const int32_t **vSrc = (const int32_t **)chrVSrc; +int shift = 15; +int big_endian = c->dstFormat == AV_PIX_FMT_P016BE; +int i, j; + +for (i = 0; i < chrDstW; i++) { +int u = 1 << (shift - 1); +int v = 1 << (shift - 1); + +/* See yuv2planeX_16_c_template for details. */ +u -= 0x4000; +v -= 0x4000; +for (j = 0; j < chrFilterSize; j++) { +u += uSrc[j][i] * (unsigned)chrFilter[j]; +v += vSrc[j][i] * (unsigned)chrFilter[j]; +} + +output_pixel([2*i] , u, 0x8000, int); +output_pixel([2*i+1], v, 0x8000, int); +} +} + #undef output_pixel #define output_pixel(pos, val) \ @@ -2257,6 +2285,9 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } else if (is16BPS(dstFormat)) { *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c; *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c; +if (dstFormat == AV_PIX_FMT_P016LE || dstFormat == AV_PIX_FMT_P016BE) { + *yuv2nv12cX = yuv2p016cX_c; +} } else if (isNBPS(dstFormat)) { if (desc->comp[0].depth == 9) { *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 5ec2116bcf..766c9b4872 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -180,16 +180,28 @@ static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } -static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[], +static int planarToP01xWrapper(SwsContext *c, const uint8_t *src8[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dstParam8[], int dstStride[]) { +const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat); +const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat); const uint16_t **src = (const uint16_t**)src8; uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY); uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2); int x, y; +/* Calculate net shift required for values. */ +const int shift[3] = { +dst_format->comp[0].depth + dst_format->comp[0].shift - +src_format->comp[0].depth - src_format->comp[0].shift, +dst_format->comp[1].depth + dst_format->comp[1].shift - +src_format->comp[1].depth - src_format->comp[1].shift, +dst_format->comp[2].depth + dst_format->comp[2].shift - +src_format->comp[2].depth - src_format->comp[2].shift, +}; + av_assert0(!(srcStride[0] % 2 || srcStride[1] % 2 || srcStride[2] % 2 ||
[FFmpeg-cvslog] avcodec/nvenc: Declare support for P016
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Feb 25 09:08:06 2018 -0800| [6a89cdc474bc1a2a276dff63629784d68b9cab6e] | committer: Philip Langdale avcodec/nvenc: Declare support for P016 nvenc doesn't support P016, but we have two problems today: 1) We declare support for YUV444P16 which nvenc also doesn't support. We do this because it's the only pix_fmt we have that can approximate nvenc's internal format that is YUV444P10 with data in MSBs instead of LSBs. Because the declared format is a 16bit one, it will be preferrentially chosen when encoding >10bit content, but that content will normally be YUV420P12 or P016 which should get mapped to P010 and not YUV444P10. 2) Transcoding P016 content with nvenc should be possible in a pure hardware pipeline, and that can't be done if nvenc doesn't say it accepts P016. By mapping it to P010, we can use it, albeit with truncation. I have established that swscale doesn't know how to dither to 10bits so we'd get truncation anyway, even if we tried to do this 'properly'. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6a89cdc474bc1a2a276dff63629784d68b9cab6e --- libavcodec/nvenc.c | 5 - libavcodec/version.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 39c3aa1fbb..1f601a63bd 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -41,7 +41,8 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = { AV_PIX_FMT_NV12, AV_PIX_FMT_P010, AV_PIX_FMT_YUV444P, -AV_PIX_FMT_YUV444P16, +AV_PIX_FMT_P016, // Truncated to 10bits +AV_PIX_FMT_YUV444P16, // Truncated to 10bits AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, AV_PIX_FMT_CUDA, @@ -52,6 +53,7 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = { }; #define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_P010|| \ +pix_fmt == AV_PIX_FMT_P016|| \ pix_fmt == AV_PIX_FMT_YUV444P16) #define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \ @@ -1219,6 +1221,7 @@ static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt) case AV_PIX_FMT_NV12: return NV_ENC_BUFFER_FORMAT_NV12_PL; case AV_PIX_FMT_P010: +case AV_PIX_FMT_P016: return NV_ENC_BUFFER_FORMAT_YUV420_10BIT; case AV_PIX_FMT_YUV444P: return NV_ENC_BUFFER_FORMAT_YUV444_PL; diff --git a/libavcodec/version.h b/libavcodec/version.h index 1e84410d68..d8256bff21 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 58 #define LIBAVCODEC_VERSION_MINOR 13 -#define LIBAVCODEC_VERSION_MICRO 101 +#define LIBAVCODEC_VERSION_MICRO 102 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec: Implement mjpeg nvdec hwaccel
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Feb 18 15:53:33 2018 -0800| [cd98f20b4abac8241ef7f922eb85ba6fe3fe230b] | committer: Mark Thompson avcodec/nvdec: Implement mjpeg nvdec hwaccel > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cd98f20b4abac8241ef7f922eb85ba6fe3fe230b --- Changelog| 2 +- configure| 2 ++ libavcodec/Makefile | 1 + libavcodec/hwaccels.h| 1 + libavcodec/mjpegdec.c| 6 libavcodec/nvdec.c | 1 + libavcodec/nvdec_mjpeg.c | 86 libavcodec/version.h | 2 +- 8 files changed, 99 insertions(+), 2 deletions(-) diff --git a/Changelog b/Changelog index 441ee1cd29..f7afd2f9ed 100644 --- a/Changelog +++ b/Changelog @@ -13,7 +13,7 @@ version : - PCE support for extended channel layouts in the AAC encoder - native aptX and aptX HD encoder and decoder - Raw aptX and aptX HD muxer and demuxer -- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1, VP8/9 hwaccel decoding +- NVIDIA NVDEC-accelerated H.264, HEVC, MJPEG, MPEG-1/2/4, VC1, VP8/9 hwaccel decoding - Intel QSV-accelerated overlay filter - mcompand audio filter - acontrast audio filter diff --git a/configure b/configure index 9b2468516a..d40718f842 100755 --- a/configure +++ b/configure @@ -2714,6 +2714,8 @@ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" hevc_vdpau_hwaccel_select="hevc_decoder" hevc_videotoolbox_hwaccel_deps="videotoolbox" hevc_videotoolbox_hwaccel_select="hevc_decoder" +mjpeg_nvdec_hwaccel_deps="nvdec" +mjpeg_nvdec_hwaccel_select="mjpeg_decoder" mjpeg_vaapi_hwaccel_deps="vaapi" mjpeg_vaapi_hwaccel_select="mjpeg_decoder" mpeg_xvmc_hwaccel_deps="xvmc" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index de52bc2094..c7a5692421 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -854,6 +854,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec_h2645.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o +OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)+= nvdec_mjpeg.o OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL)+= vaapi_mjpeg.o OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL)+= nvdec_mpeg12.o OBJS-$(CONFIG_MPEG1_VDPAU_HWACCEL)+= vdpau_mpeg12.o diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 420e2feeea..7d73da8676 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -37,6 +37,7 @@ extern const AVHWAccel ff_hevc_nvdec_hwaccel; extern const AVHWAccel ff_hevc_vaapi_hwaccel; extern const AVHWAccel ff_hevc_vdpau_hwaccel; extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; +extern const AVHWAccel ff_mjpeg_nvdec_hwaccel; extern const AVHWAccel ff_mjpeg_vaapi_hwaccel; extern const AVHWAccel ff_mpeg1_nvdec_hwaccel; extern const AVHWAccel ff_mpeg1_vdpau_hwaccel; diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c index 939f2849d0..beef174618 100644 --- a/libavcodec/mjpegdec.c +++ b/libavcodec/mjpegdec.c @@ -650,6 +650,9 @@ unk_pixfmt: s->avctx->pix_fmt = s->hwaccel_pix_fmt; } else { enum AVPixelFormat pix_fmts[] = { +#if CONFIG_MJPEG_NVDEC_HWACCEL +AV_PIX_FMT_CUDA, +#endif #if CONFIG_MJPEG_VAAPI_HWACCEL AV_PIX_FMT_VAAPI, #endif @@ -2780,6 +2783,9 @@ AVCodec ff_mjpeg_decoder = { .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM, .hw_configs = (const AVCodecHWConfigInternal*[]) { +#if CONFIG_MJPEG_NVDEC_HWACCEL +HWACCEL_NVDEC(mjpeg), +#endif #if CONFIG_MJPEG_VAAPI_HWACCEL HWACCEL_VAAPI(mjpeg), #endif diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index e9e6ea0f8b..ab3cb88b27 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -54,6 +54,7 @@ static int map_avcodec_id(enum AVCodecID id) switch (id) { case AV_CODEC_ID_H264: return cudaVideoCodec_H264; case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; +case AV_CODEC_ID_MJPEG: return cudaVideoCodec_JPEG; case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1; case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2; case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4; diff --git a/libavcodec/nvdec_mjpeg.c b/libavcodec/nvdec_mjpeg.c new file mode 100644 index 00..7e404246ce --- /dev/null +++ b/libavcodec/nvdec_mjpeg.c @@ -0,0 +1,86 @@ +/* + * MJPEG HW decode acceleration through NVDEC + * + * Copyright (c) 2017 Philip Langdale + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the Lice
[FFmpeg-cvslog] avcodec/nvdec: More effort to make vp8 compile with gcc < 4.6
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Nov 26 22:18:22 2017 -0800| [0e93694e64cdc72f7ccb8a986171593e672b8dba] | committer: Philip Langdale avcodec/nvdec: More effort to make vp8 compile with gcc < 4.6 I'm told my prefix work-around wasn't enough to make it compile, although I'm not sure why; I did some basic testing and that approach appeared to work, but I'm not in a position to do a full compile on CentOS 6 so I can't be sure of anything. I have had it confirmed that the additional change to not use named initialisers is enough to make it compile, so let's throw that into the mix too. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0e93694e64cdc72f7ccb8a986171593e672b8dba --- libavcodec/nvdec_vp8.c | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c index f29a66ac72..7b37445613 100644 --- a/libavcodec/nvdec_vp8.c +++ b/libavcodec/nvdec_vp8.c @@ -65,20 +65,19 @@ static int nvdec_vp8_start_frame(AVCodecContext *avctx, const uint8_t *buffer, u .GoldenRefIdx= safe_get_ref_idx(h->framep[VP56_FRAME_GOLDEN]), .AltRefIdx = safe_get_ref_idx(h->framep[VP56_FRAME_GOLDEN2]), /* - * Explicit braces for anonymous inners to work around limitations - * in ancient versions of gcc. + * Explicit braces for anonymous inners and unnamed fields + * to work around limitations in ancient versions of gcc. */ -{ -{ -.frame_type = !h->keyframe, -.version = h->profile, -.show_frame = !h->invisible, -.update_mb_segmentation_data = h->segmentation.enabled ? - h->segmentation.update_feature_data : - 0, +{ // union +{ // struct +!h->keyframe, // frame_type +h->profile, // version +!h->invisible,// show_frame +h->segmentation.enabled ? // update_mb_segmentation_data +h->segmentation.update_feature_data : 0, } } - } +} }; return 0; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec: Make vp8 initialisation more 'compatible'
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Nov 26 20:40:32 2017 -0800| [b93d96a07be40f8e5d267d55fe961285586c0fd7] | committer: Philip Langdale avcodec/nvdec: Make vp8 initialisation more 'compatible' Ancient versions of gcc (pre 4.6) can't directly initialise members of anonymous inner unions/structs by name. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=10676 Unfortunately, RHEL 6 shipped with one of these ancient versions and so we're stuck with it until approximately the heat death of the universe. Putting explicit braces into the initialisation is possibly a work-around but the behaviour there was never fully understood before direct initialisation was made to work. So, this may or may not work. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b93d96a07be40f8e5d267d55fe961285586c0fd7 --- libavcodec/nvdec_vp8.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c index ceb3de2c3a..f29a66ac72 100644 --- a/libavcodec/nvdec_vp8.c +++ b/libavcodec/nvdec_vp8.c @@ -64,11 +64,20 @@ static int nvdec_vp8_start_frame(AVCodecContext *avctx, const uint8_t *buffer, u .LastRefIdx = safe_get_ref_idx(h->framep[VP56_FRAME_PREVIOUS]), .GoldenRefIdx= safe_get_ref_idx(h->framep[VP56_FRAME_GOLDEN]), .AltRefIdx = safe_get_ref_idx(h->framep[VP56_FRAME_GOLDEN2]), - -.frame_type = !h->keyframe, -.version = h->profile, -.show_frame = !h->invisible, -.update_mb_segmentation_data = h->segmentation.enabled ? h->segmentation.update_feature_data : 0, +/* + * Explicit braces for anonymous inners to work around limitations + * in ancient versions of gcc. + */ +{ +{ +.frame_type = !h->keyframe, +.version = h->profile, +.show_frame = !h->invisible, +.update_mb_segmentation_data = h->segmentation.enabled ? + h->segmentation.update_feature_data : + 0, +} +} } }; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvdec: Implement vp8 hwaccel
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Nov 19 08:42:39 2017 -0800| [1da9851e3470a019a35793c913a1de07f75dc3fc] | committer: Philip Langdale avcodec/nvdec: Implement vp8 hwaccel > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1da9851e3470a019a35793c913a1de07f75dc3fc --- Changelog | 2 +- configure | 2 ++ libavcodec/Makefile| 1 + libavcodec/hwaccels.h | 1 + libavcodec/nvdec.c | 1 + libavcodec/nvdec_vp8.c | 97 ++ libavcodec/version.h | 2 +- libavcodec/vp8.c | 6 8 files changed, 110 insertions(+), 2 deletions(-) diff --git a/Changelog b/Changelog index e3092e211f..afead72f14 100644 --- a/Changelog +++ b/Changelog @@ -13,7 +13,7 @@ version : - PCE support for extended channel layouts in the AAC encoder - native aptX encoder and decoder - Raw aptX muxer and demuxer -- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1 and VP9 hwaccel decoding +- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1, VP8/9 hwaccel decoding - Intel QSV-accelerated overlay filter - mcompand audio filter - acontrast audio filter diff --git a/configure b/configure index fbff9c..6748ef8bc9 100755 --- a/configure +++ b/configure @@ -2746,6 +2746,8 @@ vc1_vaapi_hwaccel_deps="vaapi" vc1_vaapi_hwaccel_select="vc1_decoder" vc1_vdpau_hwaccel_deps="vdpau" vc1_vdpau_hwaccel_select="vc1_decoder" +vp8_nvdec_hwaccel_deps="nvdec" +vp8_nvdec_hwaccel_select="vp8_decoder" vp8_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVP8" vp8_vaapi_hwaccel_select="vp8_decoder" vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 0ebd2820eb..a6203d424b 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -871,6 +871,7 @@ OBJS-$(CONFIG_VC1_NVDEC_HWACCEL) += nvdec_vc1.o OBJS-$(CONFIG_VC1_QSV_HWACCEL)+= qsvdec_other.o OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o +OBJS-$(CONFIG_VP8_NVDEC_HWACCEL) += nvdec_vp8.o OBJS-$(CONFIG_VP8_VAAPI_HWACCEL) += vaapi_vp8.o OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)+= dxva2_vp9.o OBJS-$(CONFIG_VP9_DXVA2_HWACCEL) += dxva2_vp9.o diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index afe7289341..fcfe4e088e 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -59,6 +59,7 @@ extern const AVHWAccel ff_vc1_dxva2_hwaccel; extern const AVHWAccel ff_vc1_nvdec_hwaccel; extern const AVHWAccel ff_vc1_vaapi_hwaccel; extern const AVHWAccel ff_vc1_vdpau_hwaccel; +extern const AVHWAccel ff_vp8_nvdec_hwaccel; extern const AVHWAccel ff_vp8_vaapi_hwaccel; extern const AVHWAccel ff_vp9_d3d11va_hwaccel; extern const AVHWAccel ff_vp9_d3d11va2_hwaccel; diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index da4451a739..c7a02ff40f 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -58,6 +58,7 @@ static int map_avcodec_id(enum AVCodecID id) case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2; case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4; case AV_CODEC_ID_VC1:return cudaVideoCodec_VC1; +case AV_CODEC_ID_VP8:return cudaVideoCodec_VP8; case AV_CODEC_ID_VP9:return cudaVideoCodec_VP9; case AV_CODEC_ID_WMV3: return cudaVideoCodec_VC1; } diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c new file mode 100644 index 00..ceb3de2c3a --- /dev/null +++ b/libavcodec/nvdec_vp8.c @@ -0,0 +1,97 @@ +/* + * VP8 HW decode acceleration through NVDEC + * + * Copyright (c) 2017 Philip Langdale + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avcodec.h" +#include "nvdec.h" +#include "decode.h" +#include "internal.h" +#include "vp8.h" + +static unsigned char safe_get_ref_idx(VP8Frame *frame) +{ +return frame ? ff_nvdec_get_ref_idx(frame->tf.f) : 255; +} + +static int nvdec_vp8_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) +{ +VP8Context *h = avctx->priv_data
[FFmpeg-cvslog] avcodec/nvdec: Round up odd width/height values
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Fri Nov 24 10:05:49 2017 -0800| [4186a77f26a82b4e15109d3286d6de98c2cfe7b6] | committer: Philip Langdale avcodec/nvdec: Round up odd width/height values nvdec will not produce odd width/height output, and while this is basically never an issue with most codecs, due to internal alignment requirements, you can get odd sized jpegs. If an odd-sized jpeg is encountered, nvdec will actually round down internally and produce output that is slightly smaller. This isn't the end of the world, as long as you know the output size doesn't match the original image resolution. However, with an hwaccel, we don't know. The decoder controls the reported output size and the hwaccel cannot change it. I was able to trigger an error in mpv where it tries to copy the output surface as part of rendering and triggers a cuda error because cuda knows the output frame is smaller than expected. To fix this, we can round up the configured width/height passed to nvdec so that the frames are always at least as large as the decoder's reported size, and data can be copied out safely. In this particular jpeg case, you end up with a blank (green) line at the bottom due to nvdec refusing to decode the last line, but the behaviour matches cuviddec, so it's as good as you're going to get. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4186a77f26a82b4e15109d3286d6de98c2cfe7b6 --- libavcodec/nvdec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 0a39927031..da4451a739 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -532,8 +532,8 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, } frames_ctx->format= AV_PIX_FMT_CUDA; -frames_ctx->width = avctx->coded_width; -frames_ctx->height= avctx->coded_height; +frames_ctx->width = (avctx->coded_width + 1) & ~1; +frames_ctx->height= (avctx->coded_height + 1) & ~1; frames_ctx->initial_pool_size = dpb_size; switch (sw_desc->comp[0].depth) { ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec: Refactor common nvdec hwaccel logic
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sat Nov 18 20:29:15 2017 -0800| [4c7b023d56e09a78a587d036db1b64bf7c493b3d] | committer: Philip Langdale avcodec: Refactor common nvdec hwaccel logic The 'simple' hwaccels (not h.264 and hevc) all use the same bitstream management and reference lookup logic so let's refactor all that into common functions. I verified that casting a signed int -1 to unsigned char produces 255 according to the C language specification. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4c7b023d56e09a78a587d036db1b64bf7c493b3d --- libavcodec/nvdec.c| 46 +++ libavcodec/nvdec.h| 4 libavcodec/nvdec_mpeg12.c | 53 - libavcodec/nvdec_vc1.c| 55 ++- libavcodec/nvdec_vp9.c| 53 + 5 files changed, 65 insertions(+), 146 deletions(-) diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 3d62840e9f..97ff605f0f 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -475,6 +475,36 @@ finish: return ret; } +int ff_nvdec_simple_end_frame(AVCodecContext *avctx) +{ +NVDECContext *ctx = avctx->internal->hwaccel_priv_data; +int ret = ff_nvdec_end_frame(avctx); +ctx->bitstream = NULL; +return ret; +} + +int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, + uint32_t size) +{ +NVDECContext *ctx = avctx->internal->hwaccel_priv_data; +void *tmp; + +tmp = av_fast_realloc(ctx->slice_offsets, >slice_offsets_allocated, + (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets)); +if (!tmp) +return AVERROR(ENOMEM); +ctx->slice_offsets = tmp; + +if (!ctx->bitstream) +ctx->bitstream = (uint8_t*)buffer; + +ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream; +ctx->bitstream_len += size; +ctx->nb_slices++; + +return 0; +} + int ff_nvdec_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx, int dpb_size) @@ -520,3 +550,19 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, return 0; } + +int ff_nvdec_get_ref_idx(AVFrame *frame) +{ +FrameDecodeData *fdd; +NVDECFrame *cf; + +if (!frame || !frame->private_ref) +return -1; + +fdd = (FrameDecodeData*)frame->private_ref->data; +cf = (NVDECFrame*)fdd->hwaccel_priv; +if (!cf) +return -1; + +return cf->idx; +} diff --git a/libavcodec/nvdec.h b/libavcodec/nvdec.h index 14d29ee94b..90578d5a1c 100644 --- a/libavcodec/nvdec.h +++ b/libavcodec/nvdec.h @@ -58,8 +58,12 @@ int ff_nvdec_decode_init(AVCodecContext *avctx); int ff_nvdec_decode_uninit(AVCodecContext *avctx); int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame); int ff_nvdec_end_frame(AVCodecContext *avctx); +int ff_nvdec_simple_end_frame(AVCodecContext *avctx); +int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, + uint32_t size); int ff_nvdec_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx, int dpb_size); +int ff_nvdec_get_ref_idx(AVFrame *frame); #endif /* AVCODEC_NVDEC_H */ diff --git a/libavcodec/nvdec_mpeg12.c b/libavcodec/nvdec_mpeg12.c index 127e843d85..db9cebeddd 100644 --- a/libavcodec/nvdec_mpeg12.c +++ b/libavcodec/nvdec_mpeg12.c @@ -25,22 +25,6 @@ #include "nvdec.h" #include "decode.h" -static int get_ref_idx(AVFrame *frame) -{ -FrameDecodeData *fdd; -NVDECFrame *cf; - -if (!frame || !frame->private_ref) -return -1; - -fdd = (FrameDecodeData*)frame->private_ref->data; -cf = (NVDECFrame*)fdd->hwaccel_priv; -if (!cf) -return -1; - -return cf->idx; -} - static int nvdec_mpeg12_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) { MpegEncContext *s = avctx->priv_data; @@ -71,8 +55,8 @@ static int nvdec_mpeg12_start_frame(AVCodecContext *avctx, const uint8_t *buffer s->pict_type == AV_PICTURE_TYPE_P, .CodecSpecific.mpeg2 = { -.ForwardRefIdx = get_ref_idx(s->last_picture.f), -.BackwardRefIdx= get_ref_idx(s->next_picture.f), +.ForwardRefIdx = ff_nvdec_get_ref_idx(s->last_picture.f), +.BackwardRefIdx= ff_nvdec_get_ref_idx(s->next_picture.f), .picture_coding_type= s->pict_type, .full_pel_forward_vector= s->full_pel[0], @@ -99,35 +83,6 @@ static int nvdec_mpeg12_start_frame(AVCodecContext *avctx, const uint8_t *buffer return 0; } -static int nvdec_mpeg12_end_frame(AVCodecContext *avctx
[FFmpeg-cvslog] avcodec: Implement mpeg1 nvdec hwaccel
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Thu Nov 16 07:38:10 2017 -0800| [8bca292c3045a7f372299f71c8d88b541fe2a816] | committer: Philip Langdale avcodec: Implement mpeg1 nvdec hwaccel Once I remembered that there's a separate decoder type for mpeg1, even though params struct is shared with mpeg2, everything worked. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8bca292c3045a7f372299f71c8d88b541fe2a816 --- Changelog | 2 +- configure | 2 ++ libavcodec/Makefile | 1 + libavcodec/allcodecs.c| 1 + libavcodec/mpeg12dec.c| 3 +++ libavcodec/nvdec.c| 1 + libavcodec/nvdec_mpeg12.c | 18 +- libavcodec/version.h | 2 +- 8 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Changelog b/Changelog index e1f8a648b0..5a9d183aed 100644 --- a/Changelog +++ b/Changelog @@ -13,7 +13,7 @@ version : - PCE support for extended channel layouts in the AAC encoder - native aptX encoder and decoder - Raw aptX muxer and demuxer -- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-2, VC1 and VP9 hwaccel decoding +- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2, VC1 and VP9 hwaccel decoding - Intel QSV-accelerated overlay filter - mcompand audio filter - acontrast audio filter diff --git a/configure b/configure index 8262358138..7ac9a8d391 100755 --- a/configure +++ b/configure @@ -2704,6 +2704,8 @@ mjpeg_cuvid_hwaccel_select="mjpeg_cuvid_decoder" mpeg_xvmc_hwaccel_deps="xvmc" mpeg_xvmc_hwaccel_select="mpeg2video_decoder" mpeg1_cuvid_hwaccel_select="mpeg1_cuvid_decoder" +mpeg1_nvdec_hwaccel_deps="nvdec" +mpeg1_nvdec_hwaccel_select="mpeg1video_decoder" mpeg1_vdpau_hwaccel_deps="vdpau" mpeg1_vdpau_hwaccel_select="mpeg1video_decoder" mpeg1_videotoolbox_hwaccel_deps="videotoolbox" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 494c76da76..0573454c7b 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -849,6 +849,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec_h2645.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o +OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL)+= nvdec_mpeg12.o OBJS-$(CONFIG_MPEG1_VDPAU_HWACCEL)+= vdpau_mpeg12.o OBJS-$(CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_MPEG1_XVMC_HWACCEL) += mpegvideo_xvmc.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index e0adb71951..e9df7049de 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -87,6 +87,7 @@ static void register_all(void) REGISTER_HWACCEL(HEVC_VIDEOTOOLBOX, hevc_videotoolbox); REGISTER_HWACCEL(MJPEG_CUVID, mjpeg_cuvid); REGISTER_HWACCEL(MPEG1_CUVID, mpeg1_cuvid); +REGISTER_HWACCEL(MPEG1_NVDEC, mpeg1_nvdec); REGISTER_HWACCEL(MPEG1_XVMC,mpeg1_xvmc); REGISTER_HWACCEL(MPEG1_VDPAU, mpeg1_vdpau); REGISTER_HWACCEL(MPEG1_VIDEOTOOLBOX, mpeg1_videotoolbox); diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c index 2b213eebcd..5a51d09bb0 100644 --- a/libavcodec/mpeg12dec.c +++ b/libavcodec/mpeg12dec.c @@ -1130,6 +1130,9 @@ static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm, } static const enum AVPixelFormat mpeg1_hwaccel_pixfmt_list_420[] = { +#if CONFIG_MPEG1_NVDEC_HWACCEL +AV_PIX_FMT_CUDA, +#endif #if CONFIG_MPEG1_XVMC_HWACCEL AV_PIX_FMT_XVMC, #endif diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 97ff605f0f..d5cf1058cb 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -54,6 +54,7 @@ static int map_avcodec_id(enum AVCodecID id) switch (id) { case AV_CODEC_ID_H264: return cudaVideoCodec_H264; case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; +case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1; case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2; case AV_CODEC_ID_VC1:return cudaVideoCodec_VC1; case AV_CODEC_ID_VP9:return cudaVideoCodec_VP9; diff --git a/libavcodec/nvdec_mpeg12.c b/libavcodec/nvdec_mpeg12.c index db9cebeddd..e29d085a79 100644 --- a/libavcodec/nvdec_mpeg12.c +++ b/libavcodec/nvdec_mpeg12.c @@ -1,5 +1,5 @@ /* - * MPEG-2 HW decode acceleration through NVDEC + * MPEG-1/2 HW decode acceleration through NVDEC * * Copyright (c) 2017 Philip Langdale * @@ -105,3 +105,19 @@ AVHWAccel ff_mpeg2_nvdec_hwaccel = { .priv_data_size = sizeof(NVDECContext), }; #endif + +#if CONFIG_MPEG1_NVDEC_HWACCEL +AVHWAccel ff_mpeg1_nvdec_hwaccel = { +.name = "mpeg1_nvdec", +.type = AVMEDIA_TYPE_VIDEO, +.id = AV_CODEC_ID_MPEG1VIDEO, +.pix_fmt = AV_PIX_FMT_CUDA, +.start_frame = nvdec_mpeg12_start_frame, +
[FFmpeg-cvslog] avcodec: Implement mpeg4 nvdec hwaccel
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Wed Nov 15 20:59:29 2017 -0800| [6b77a10e43e1a8ed8ead20f344485d400440055c] | committer: Philip Langdale avcodec: Implement mpeg4 nvdec hwaccel This was predictably nightmarish, given how ridiculous mpeg4 is. I had to stare at the cuvid parser output for a long time to work out what each field was supposed to be, and even then, I still don't fully understand some of them. Particularly: vop_coded: If I'm reading the decoder correctly, this flag will always be 1 as the decoder will not pass the hwaccel any frame where it is not 1. divx_flags: There's obviously no documentation on what the possible flags are. I simply observed that this is '0' for a normal bitstream and '5' for packed b-frames. gmc_enabled: I had a number of guesses as to what this mapped to. I picked the condition I did based on when the cuvid parser was setting flag. Also note that as with the vdpau hwaccel, the decoder needs to consume the entire frame and not the slice. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6b77a10e43e1a8ed8ead20f344485d400440055c --- Changelog| 2 +- configure| 2 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/h263dec.c | 3 ++ libavcodec/nvdec.c | 1 + libavcodec/nvdec_mpeg4.c | 121 +++ libavcodec/version.h | 2 +- 8 files changed, 131 insertions(+), 2 deletions(-) diff --git a/Changelog b/Changelog index 5a9d183aed..74ed35cfe6 100644 --- a/Changelog +++ b/Changelog @@ -13,7 +13,7 @@ version : - PCE support for extended channel layouts in the AAC encoder - native aptX encoder and decoder - Raw aptX muxer and demuxer -- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2, VC1 and VP9 hwaccel decoding +- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1 and VP9 hwaccel decoding - Intel QSV-accelerated overlay filter - mcompand audio filter - acontrast audio filter diff --git a/configure b/configure index 7ac9a8d391..25c3124ca6 100755 --- a/configure +++ b/configure @@ -2735,6 +2735,8 @@ mpeg2_xvmc_hwaccel_select="mpeg2video_decoder" mpeg4_cuvid_hwaccel_select="mpeg4_cuvid_decoder" mpeg4_mediacodec_hwaccel_deps="mediacodec" mpeg4_mmal_hwaccel_deps="mmal" +mpeg4_nvdec_hwaccel_deps="nvdec" +mpeg4_nvdec_hwaccel_select="mpeg4_decoder" mpeg4_vaapi_hwaccel_deps="vaapi" mpeg4_vaapi_hwaccel_select="mpeg4_decoder" mpeg4_vdpau_hwaccel_deps="vdpau" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 0573454c7b..2af957ab72 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -861,6 +861,7 @@ OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)+= vaapi_mpeg2.o OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)+= vdpau_mpeg12.o OBJS-$(CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_MPEG2_XVMC_HWACCEL) += mpegvideo_xvmc.o +OBJS-$(CONFIG_MPEG4_NVDEC_HWACCEL)+= nvdec_mpeg4.o OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)+= vaapi_mpeg4.o OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)+= vdpau_mpeg4.o OBJS-$(CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index e9df7049de..85c38c83aa 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -106,6 +106,7 @@ static void register_all(void) REGISTER_HWACCEL(MPEG4_CUVID, mpeg4_cuvid); REGISTER_HWACCEL(MPEG4_MEDIACODEC, mpeg4_mediacodec); REGISTER_HWACCEL(MPEG4_MMAL,mpeg4_mmal); +REGISTER_HWACCEL(MPEG4_NVDEC, mpeg4_nvdec); REGISTER_HWACCEL(MPEG4_VAAPI, mpeg4_vaapi); REGISTER_HWACCEL(MPEG4_VDPAU, mpeg4_vdpau); REGISTER_HWACCEL(MPEG4_VIDEOTOOLBOX, mpeg4_videotoolbox); diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index c7cf4bc0c2..b222de793b 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -714,6 +714,9 @@ const enum AVPixelFormat ff_h263_hwaccel_pixfmt_list_420[] = { #if CONFIG_H263_VAAPI_HWACCEL || CONFIG_MPEG4_VAAPI_HWACCEL AV_PIX_FMT_VAAPI, #endif +#if CONFIG_MPEG4_NVDEC_HWACCEL +AV_PIX_FMT_CUDA, +#endif #if CONFIG_MPEG4_VDPAU_HWACCEL AV_PIX_FMT_VDPAU, #endif diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index d5cf1058cb..efcd47a7f7 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -56,6 +56,7 @@ static int map_avcodec_id(enum AVCodecID id) case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1; case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2; +case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4; case AV_CODEC_ID_VC1:return cudaVideoCodec_VC1; case AV_CODEC_ID_VP9:return cudaVideoCodec_VP9; case AV_CODEC_ID_WMV3: return cudaVi
[FFmpeg-cvslog] avcodec: Fix reference data type for nvdec vc1 hwaccel
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Thu Nov 16 07:31:58 2017 -0800| [5a0f6b099f3e8fcb95a80e3ffe52b3bf369efe24] | committer: Philip Langdale avcodec: Fix reference data type for nvdec vc1 hwaccel I took the reference lookup code from the vp9 hwaccel where the type is unsigned char, but for vc1, the type is signed int. This is particularly important because the value used when there's no reference is different (255 vs -1). It didn't seem to break anything, but for mpeg1/2/4, this mistake caused decode errors. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5a0f6b099f3e8fcb95a80e3ffe52b3bf369efe24 --- libavcodec/nvdec_vc1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/nvdec_vc1.c b/libavcodec/nvdec_vc1.c index cf75ba5aca..588a5b9d07 100644 --- a/libavcodec/nvdec_vc1.c +++ b/libavcodec/nvdec_vc1.c @@ -25,13 +25,13 @@ #include "decode.h" #include "vc1.h" -static unsigned char get_ref_idx(AVFrame *frame) +static int get_ref_idx(AVFrame *frame) { FrameDecodeData *fdd; NVDECFrame *cf; if (!frame || !frame->private_ref) -return 255; +return -1; fdd = (FrameDecodeData*)frame->private_ref->data; cf = (NVDECFrame*)fdd->hwaccel_priv; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec: Implement mpeg2 nvdec hwaccel
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Thu Nov 16 07:35:17 2017 -0800| [7c9f739d864c0ed8f1b433d6a7d9f674edda9cf5] | committer: Philip Langdale avcodec: Implement mpeg2 nvdec hwaccel This is mostly straight-forward. The weird part is that it should just work for mpeg1, but I see corruption in my test cases, so I'm going to try and fix that separately. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7c9f739d864c0ed8f1b433d6a7d9f674edda9cf5 --- Changelog | 2 +- configure | 2 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c| 1 + libavcodec/mpeg12dec.c| 3 + libavcodec/nvdec.c| 11 ++-- libavcodec/nvdec_mpeg12.c | 152 ++ libavcodec/version.h | 2 +- 8 files changed, 167 insertions(+), 7 deletions(-) diff --git a/Changelog b/Changelog index 119ab678e5..cda59166fc 100644 --- a/Changelog +++ b/Changelog @@ -13,7 +13,7 @@ version : - PCE support for extended channel layouts in the AAC encoder - native aptX encoder and decoder - Raw aptX muxer and demuxer -- NVIDIA NVDEC-accelerated H.264, HEVC, VC1 and VP9 hwaccel decoding +- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-2, VC1 and VP9 hwaccel decoding - Intel QSV-accelerated overlay filter - mcompand audio filter diff --git a/configure b/configure index c8e2e35192..8b7b7e164b 100755 --- a/configure +++ b/configure @@ -2713,6 +2713,8 @@ mpeg2_dxva2_hwaccel_deps="dxva2" mpeg2_dxva2_hwaccel_select="mpeg2video_decoder" mpeg2_mediacodec_hwaccel_deps="mediacodec" mpeg2_mmal_hwaccel_deps="mmal" +mpeg2_nvdec_hwaccel_deps="nvdec" +mpeg2_nvdec_hwaccel_select="mpeg2video_decoder" mpeg2_qsv_hwaccel_deps="libmfx" mpeg2_vaapi_hwaccel_deps="vaapi" mpeg2_vaapi_hwaccel_select="mpeg2video_decoder" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 6315672573..494c76da76 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -854,6 +854,7 @@ OBJS-$(CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_MPEG1_XVMC_HWACCEL) += mpegvideo_xvmc.o OBJS-$(CONFIG_MPEG2_D3D11VA_HWACCEL) += dxva2_mpeg2.o OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL)+= dxva2_mpeg2.o +OBJS-$(CONFIG_MPEG2_NVDEC_HWACCEL)+= nvdec_mpeg12.o OBJS-$(CONFIG_MPEG2_QSV_HWACCEL) += qsvdec_other.o OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)+= vaapi_mpeg2.o OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)+= vdpau_mpeg12.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index e213f3757c..e0adb71951 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -96,6 +96,7 @@ static void register_all(void) REGISTER_HWACCEL(MPEG2_D3D11VA2,mpeg2_d3d11va2); REGISTER_HWACCEL(MPEG2_DXVA2, mpeg2_dxva2); REGISTER_HWACCEL(MPEG2_MMAL,mpeg2_mmal); +REGISTER_HWACCEL(MPEG2_NVDEC, mpeg2_nvdec); REGISTER_HWACCEL(MPEG2_QSV, mpeg2_qsv); REGISTER_HWACCEL(MPEG2_VAAPI, mpeg2_vaapi); REGISTER_HWACCEL(MPEG2_VDPAU, mpeg2_vdpau); diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c index d5bc5f21b2..2b213eebcd 100644 --- a/libavcodec/mpeg12dec.c +++ b/libavcodec/mpeg12dec.c @@ -1141,6 +1141,9 @@ static const enum AVPixelFormat mpeg1_hwaccel_pixfmt_list_420[] = { }; static const enum AVPixelFormat mpeg2_hwaccel_pixfmt_list_420[] = { +#if CONFIG_MPEG2_NVDEC_HWACCEL +AV_PIX_FMT_CUDA, +#endif #if CONFIG_MPEG2_XVMC_HWACCEL AV_PIX_FMT_XVMC, #endif diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 20d7c3db27..3d62840e9f 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -52,11 +52,12 @@ typedef struct NVDECFramePool { static int map_avcodec_id(enum AVCodecID id) { switch (id) { -case AV_CODEC_ID_H264: return cudaVideoCodec_H264; -case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; -case AV_CODEC_ID_VC1: return cudaVideoCodec_VC1; -case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9; -case AV_CODEC_ID_WMV3: return cudaVideoCodec_VC1; +case AV_CODEC_ID_H264: return cudaVideoCodec_H264; +case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; +case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2; +case AV_CODEC_ID_VC1:return cudaVideoCodec_VC1; +case AV_CODEC_ID_VP9:return cudaVideoCodec_VP9; +case AV_CODEC_ID_WMV3: return cudaVideoCodec_VC1; } return -1; } diff --git a/libavcodec/nvdec_mpeg12.c b/libavcodec/nvdec_mpeg12.c new file mode 100644 index 00..127e843d85 --- /dev/null +++ b/libavcodec/nvdec_mpeg12.c @@ -0,0 +1,152 @@ +/* + * MPEG-2 HW decode acceleration through NVDEC + * + * Copyright (c) 2017 Philip Langdale + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as p
[FFmpeg-cvslog] avcodec: Implement vc1 nvdec hwaccel
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Mon Nov 13 21:09:27 2017 -0800| [912ceba61b0d45caa8ba8664ddf7b18e2121ddf3] | committer: Philip Langdale avcodec: Implement vc1 nvdec hwaccel This hwaccel is interesting because it also works for wmv3/9 content, which is not supported by the nvidia parser used by cuviddec. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=912ceba61b0d45caa8ba8664ddf7b18e2121ddf3 --- Changelog | 2 +- configure | 3 + libavcodec/Makefile| 1 + libavcodec/allcodecs.c | 2 + libavcodec/nvdec.c | 2 + libavcodec/nvdec_vc1.c | 184 + libavcodec/vc1dec.c| 3 + libavcodec/version.h | 2 +- 8 files changed, 197 insertions(+), 2 deletions(-) diff --git a/Changelog b/Changelog index 68829f22a4..d2b5530ad7 100644 --- a/Changelog +++ b/Changelog @@ -13,7 +13,7 @@ version : - PCE support for extended channel layouts in the AAC encoder - native aptX encoder and decoder - Raw aptX muxer and demuxer -- NVIDIA NVDEC-accelerated H.264, HEVC and VP9 hwaccel decoding +- NVIDIA NVDEC-accelerated H.264, HEVC, VC1 and VP9 hwaccel decoding - Intel QSV-accelerated overlay filter diff --git a/configure b/configure index 3788f26956..934ac3abfd 100755 --- a/configure +++ b/configure @@ -2740,6 +2740,8 @@ vc1_d3d11va2_hwaccel_select="vc1_decoder" vc1_dxva2_hwaccel_deps="dxva2" vc1_dxva2_hwaccel_select="vc1_decoder" vc1_mmal_hwaccel_deps="mmal" +vc1_nvdec_hwaccel_deps="nvdec" +vc1_nvdec_hwaccel_select="vc1_decoder" vc1_qsv_hwaccel_deps="libmfx" vc1_vaapi_hwaccel_deps="vaapi" vc1_vaapi_hwaccel_select="vc1_decoder" @@ -2763,6 +2765,7 @@ vp9_vaapi_hwaccel_select="vp9_decoder" wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel" wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel" wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel" +wmv3_nvdec_hwaccel_select="vc1_nvdec_hwaccel" wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel" wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2476aecc40..6315672573 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -864,6 +864,7 @@ OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)+= vdpau_mpeg4.o OBJS-$(CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_VC1_D3D11VA_HWACCEL)+= dxva2_vc1.o OBJS-$(CONFIG_VC1_DXVA2_HWACCEL) += dxva2_vc1.o +OBJS-$(CONFIG_VC1_NVDEC_HWACCEL) += nvdec_vc1.o OBJS-$(CONFIG_VC1_QSV_HWACCEL)+= qsvdec_other.o OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 0781862de5..e213f3757c 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -111,6 +111,7 @@ static void register_all(void) REGISTER_HWACCEL(VC1_D3D11VA, vc1_d3d11va); REGISTER_HWACCEL(VC1_D3D11VA2, vc1_d3d11va2); REGISTER_HWACCEL(VC1_DXVA2, vc1_dxva2); +REGISTER_HWACCEL(VC1_NVDEC, vc1_nvdec); REGISTER_HWACCEL(VC1_VAAPI, vc1_vaapi); REGISTER_HWACCEL(VC1_VDPAU, vc1_vdpau); REGISTER_HWACCEL(VC1_MMAL, vc1_mmal); @@ -128,6 +129,7 @@ static void register_all(void) REGISTER_HWACCEL(WMV3_D3D11VA, wmv3_d3d11va); REGISTER_HWACCEL(WMV3_D3D11VA2, wmv3_d3d11va2); REGISTER_HWACCEL(WMV3_DXVA2,wmv3_dxva2); +REGISTER_HWACCEL(WMV3_NVDEC,wmv3_nvdec); REGISTER_HWACCEL(WMV3_VAAPI,wmv3_vaapi); REGISTER_HWACCEL(WMV3_VDPAU,wmv3_vdpau); diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index ac68faca99..20d7c3db27 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -54,7 +54,9 @@ static int map_avcodec_id(enum AVCodecID id) switch (id) { case AV_CODEC_ID_H264: return cudaVideoCodec_H264; case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC; +case AV_CODEC_ID_VC1: return cudaVideoCodec_VC1; case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9; +case AV_CODEC_ID_WMV3: return cudaVideoCodec_VC1; } return -1; } diff --git a/libavcodec/nvdec_vc1.c b/libavcodec/nvdec_vc1.c new file mode 100644 index 00..cf75ba5aca --- /dev/null +++ b/libavcodec/nvdec_vc1.c @@ -0,0 +1,184 @@ +/* + * VC1 HW decode acceleration through NVDEC + * + * Copyright (c) 2017 Philip Langdale + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; w
[FFmpeg-cvslog] avcodec: Don't assume separate u and v planes in ff_alloc_picture
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Mon Nov 13 21:11:27 2017 -0800| [bb4c9d0a8ead02f7d943c2bae3e36b30e605b30b] | committer: Philip Langdale avcodec: Don't assume separate u and v planes in ff_alloc_picture alloc_frame_buffer in ff_alloc_picture asserts that the linesize of planes 1 and 2 are the same. If the pixfmt has a single uv plane, like NV12, this won't be true. So, let's only do this check if there are more than 2 planes. We never hit this with previous hw formats because they don't set linesize to meaningful values, but the cuda hw format sets the values based on the underlying data layout. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bb4c9d0a8ead02f7d943c2bae3e36b30e605b30b --- libavcodec/mpegpicture.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c index 9811a778b7..2b72346fb2 100644 --- a/libavcodec/mpegpicture.c +++ b/libavcodec/mpegpicture.c @@ -22,6 +22,7 @@ #include "libavutil/avassert.h" #include "libavutil/common.h" +#include "libavutil/pixdesc.h" #include "avcodec.h" #include "motion_est.h" @@ -155,7 +156,8 @@ static int alloc_frame_buffer(AVCodecContext *avctx, Picture *pic, return -1; } -if (pic->f->linesize[1] != pic->f->linesize[2]) { +if (av_pix_fmt_count_planes(pic->f->format) > 2 && +pic->f->linesize[1] != pic->f->linesize[2]) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n"); ff_mpeg_unref_picture(avctx, pic); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/crystalhd: Explicitly set frame pts at all times
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Apr 23 14:34:03 2017 -0700| [181aa1be493416c38c8adf0ce21e7fdac64a039e] | committer: Philip Langdale avcodec/crystalhd: Explicitly set frame pts at all times Previously, the pts value was initialised to AV_NOPTS_VALUE and so it was not necessary to always set it. Now, with the new-new decode API, this is no longer true. I'm not sure why I avoided setting the pts when the decoder value was also AV_NOPTS_VALUE - it clearly wouldn't have changed anything previously, but here we are. Failing to do this, means the frame pts will be some random uninitalised value. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=181aa1be493416c38c8adf0ce21e7fdac64a039e --- libavcodec/crystalhd.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c index 8956ca4bfb..a22dfe16ea 100644 --- a/libavcodec/crystalhd.c +++ b/libavcodec/crystalhd.c @@ -537,14 +537,13 @@ static inline CopyRet copy_frame(AVCodecContext *avctx, if (interlaced) frame->top_field_first = !bottom_first; -if (pkt_pts != AV_NOPTS_VALUE) { -frame->pts = pkt_pts; +frame->pts = pkt_pts; #if FF_API_PKT_PTS FF_DISABLE_DEPRECATION_WARNINGS -frame->pkt_pts = pkt_pts; +frame->pkt_pts = pkt_pts; FF_ENABLE_DEPRECATION_WARNINGS #endif -} + frame->pkt_pos = -1; frame->pkt_duration = 0; frame->pkt_size = -1; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/crystalhd: Another attempt at using mpeg4_unpack_bframes bsf
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Apr 23 15:31:55 2017 -0700| [dd49eff93095110d2e878bbcc81b0062590d865f] | committer: Philip Langdale avcodec/crystalhd: Another attempt at using mpeg4_unpack_bframes bsf I tried doing this before, but it resulted in weird behaviour with certain samples. I want to say I think I've got it sorted out now, and the new autobsf stuff makes it trivial to turn on. The native support for packed bframes is buggy and I think buggy in ways beyond what I already try to account for, so this should be a net improvements. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dd49eff93095110d2e878bbcc81b0062590d865f --- libavcodec/crystalhd.c | 28 +--- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c index a22dfe16ea..83bc8bf364 100644 --- a/libavcodec/crystalhd.c +++ b/libavcodec/crystalhd.c @@ -101,7 +101,6 @@ typedef struct { /* Options */ uint32_t sWidth; -uint8_t bframe_bug; } CHDContext; static const AVOption options[] = { @@ -631,16 +630,6 @@ static inline CopyRet receive_frame(AVCodecContext *avctx, } else if (ret == BC_STS_SUCCESS) { int copy_ret = -1; if (output.PoutFlags & BC_POUT_FLAGS_PIB_VALID) { -if (avctx->codec->id == AV_CODEC_ID_MPEG4 && -output.PicInfo.timeStamp == 0 && priv->bframe_bug) { -if (!priv->bframe_bug) { -av_log(avctx, AV_LOG_VERBOSE, - "CrystalHD: Not returning packed frame twice.\n"); -} -DtsReleaseOutputBuffs(dev, NULL, FALSE); -return RET_COPY_AGAIN; -} - print_frame_info(priv, ); copy_ret = copy_frame(avctx, , frame, got_frame); @@ -675,21 +664,6 @@ static int crystalhd_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) if (avpkt && avpkt->size) { uint64_t pts; -if (!priv->bframe_bug && (avpkt->size == 6 || avpkt->size == 7)) { -/* - * Drop frames trigger the bug - */ -av_log(avctx, AV_LOG_WARNING, - "CrystalHD: Enabling work-around for packed b-frame bug\n"); -priv->bframe_bug = 1; -} else if (priv->bframe_bug && avpkt->size == 8) { -/* - * Delay frames don't trigger the bug - */ -av_log(avctx, AV_LOG_WARNING, - "CrystalHD: Disabling work-around for packed b-frame bug\n"); -priv->bframe_bug = 0; -} /* * Despite being notionally opaque, either libcrystalhd or @@ -825,7 +799,7 @@ DEFINE_CRYSTALHD_DECODER(mpeg2, MPEG2VIDEO, NULL) #endif #if CONFIG_MPEG4_CRYSTALHD_DECODER -DEFINE_CRYSTALHD_DECODER(mpeg4, MPEG4, NULL) +DEFINE_CRYSTALHD_DECODER(mpeg4, MPEG4, "mpeg4_unpack_bframes") #endif #if CONFIG_MSMPEG4_CRYSTALHD_DECODER ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/movtextenc: Ignore unmatched closing style tags
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Apr 23 10:42:25 2017 -0700| [f95c81ce104554b6860d94724a681a1bac0c4fbd] | committer: Philip Langdale avcodec/movtextenc: Ignore unmatched closing style tags The existing code will segfault if a closing tag shows up when there was never an opening tag. This isn't a well formed style, but it's also not a reason to crash. Fixes: https://trac.ffmpeg.org/ticket/6303 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f95c81ce104554b6860d94724a681a1bac0c4fbd --- libavcodec/movtextenc.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c index 20e01e206e..d795e317c3 100644 --- a/libavcodec/movtextenc.c +++ b/libavcodec/movtextenc.c @@ -57,6 +57,8 @@ typedef struct { } HilightcolorBox; typedef struct { +AVCodecContext *avctx; + ASSSplitContext *ass_ctx; AVBPrint buffer; StyleBox **style_attributes; @@ -187,6 +189,7 @@ static av_cold int mov_text_encode_init(AVCodecContext *avctx) }; MovTextContext *s = avctx->priv_data; +s->avctx = avctx; avctx->extradata_size = sizeof text_sample_entry; avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); @@ -247,6 +250,9 @@ static void mov_text_style_cb(void *priv, const char style, int close) s->style_attributes_temp->style_flag |= STYLE_FLAG_UNDERLINE; break; } +} else if (!s->style_attributes_temp) { +av_log(s->avctx, AV_LOG_WARNING, "Ignoring unmatched close tag\n"); +return; } else { s->style_attributes_temp->style_end = AV_RB16(>text_pos); av_dynarray_add(>style_attributes, >count, s->style_attributes_temp); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/crystalhd: Switch to the new generic filtering mechanism
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sat Apr 22 20:03:27 2017 -0700| [41b0561dc7246b72a834067da539ae98b1ec6631] | committer: Philip Langdale avcodec/crystalhd: Switch to the new generic filtering mechanism This lets us drop all the code for handling the mp4toannexb conversion. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=41b0561dc7246b72a834067da539ae98b1ec6631 --- libavcodec/crystalhd.c | 134 - 1 file changed, 9 insertions(+), 125 deletions(-) diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c index 8ccd6ff8bd..8956ca4bfb 100644 --- a/libavcodec/crystalhd.c +++ b/libavcodec/crystalhd.c @@ -92,15 +92,7 @@ typedef struct { AVCodecContext *avctx; HANDLE dev; -uint8_t *orig_extradata; -uint32_t orig_extradata_size; - -AVBSFContext *bsfc; - uint8_t is_70012; -uint8_t *sps_pps_buf; -uint32_t sps_pps_size; -uint8_t is_nal; uint8_t need_second_field; uint8_t draining; @@ -140,7 +132,7 @@ static inline BC_MEDIA_SUBTYPE id2subtype(CHDContext *priv, enum AVCodecID id) case AV_CODEC_ID_WMV3: return BC_MSUBTYPE_WMV3; case AV_CODEC_ID_H264: -return priv->is_nal ? BC_MSUBTYPE_AVC1 : BC_MSUBTYPE_H264; +return BC_MSUBTYPE_H264; default: return BC_MSUBTYPE_INVALID; } @@ -295,25 +287,6 @@ static av_cold int uninit(AVCodecContext *avctx) DtsCloseDecoder(device); DtsDeviceClose(device); -/* - * Restore original extradata, so that if the decoder is - * reinitialised, the bitstream detection and filtering - * will work as expected. - */ -if (priv->orig_extradata) { -av_free(avctx->extradata); -avctx->extradata = priv->orig_extradata; -avctx->extradata_size = priv->orig_extradata_size; -priv->orig_extradata = NULL; -priv->orig_extradata_size = 0; -} - -if (priv->bsfc) { -av_bsf_free(>bsfc); -} - -av_freep(>sps_pps_buf); - if (priv->head) { OpaqueList *node = priv->head; while (node) { @@ -326,60 +299,9 @@ static av_cold int uninit(AVCodecContext *avctx) return 0; } - -static av_cold int init_bsf(AVCodecContext *avctx, const char *bsf_name) -{ -CHDContext *priv = avctx->priv_data; -const AVBitStreamFilter *bsf; -int avret; -void *extradata = NULL; -size_t size = 0; - -bsf = av_bsf_get_by_name(bsf_name); -if (!bsf) { -av_log(avctx, AV_LOG_ERROR, - "Cannot open the %s BSF!\n", bsf_name); -return AVERROR_BSF_NOT_FOUND; -} - -avret = av_bsf_alloc(bsf, >bsfc); -if (avret != 0) { -return avret; -} - -avret = avcodec_parameters_from_context(priv->bsfc->par_in, avctx); -if (avret != 0) { -return avret; -} - -avret = av_bsf_init(priv->bsfc); -if (avret != 0) { -return avret; -} - -/* Back up the extradata so it can be restored at close time. */ -priv->orig_extradata = avctx->extradata; -priv->orig_extradata_size = avctx->extradata_size; - -size = priv->bsfc->par_out->extradata_size; -extradata = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE); -if (!extradata) { -av_log(avctx, AV_LOG_ERROR, - "Failed to allocate copy of extradata\n"); -return AVERROR(ENOMEM); -} -memcpy(extradata, priv->bsfc->par_out->extradata, size); - -avctx->extradata = extradata; -avctx->extradata_size = size; - -return 0; -} - static av_cold int init(AVCodecContext *avctx) { CHDContext* priv; -int avret; BC_STATUS ret; BC_INFO_CRYSTAL version; BC_INPUT_FORMAT format = { @@ -407,21 +329,10 @@ static av_cold int init(AVCodecContext *avctx) /* Initialize the library */ priv = avctx->priv_data; priv->avctx= avctx; -priv->is_nal = avctx->extradata_size > 0 && *(avctx->extradata) == 1; priv->draining = 0; subtype = id2subtype(priv, avctx->codec->id); switch (subtype) { -case BC_MSUBTYPE_AVC1: -avret = init_bsf(avctx, "h264_mp4toannexb"); -if (avret != 0) { -return avret; -} -subtype = BC_MSUBTYPE_H264; -format.startCodeSz = 4; -format.pMetaData = avctx->extradata; -format.metaDataSz = avctx->extradata_size; -break; case BC_MSUBTYPE_H264: format.startCodeSz = 4; // Fall-through @@ -781,34 +692,6 @@ static int crystalhd_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) priv->bframe_bug = 0; } -if (priv->bsfc) { -AVPacket filter_packet = { 0 }; - -ret = av_packet_ref(_packet, avpkt); -
[FFmpeg-cvslog] avcodec/crystalhd: Adapt to new new decode API
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sat Apr 22 12:01:52 2017 -0700| [3148387086ade31af214a72aa1610e2d8f68f0a0] | committer: James Almer avcodec/crystalhd: Adapt to new new decode API The new new decode API requires the decoder to ask for the next input packet, and it cannot just return EAGAIN if that packet cannot be processed yet. This means we must finally confront how we get this decoder to block when the input buffer is full and no output frames are ready yet. In the end, that isn't too hard to achieve - the main trick seems to be that you have to aggressively poll the hardware - it doesn't seem to make any forward progress if you sleep. Signed-off-by: James Almer <jamr...@gmail.com> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3148387086ade31af214a72aa1610e2d8f68f0a0 --- libavcodec/crystalhd.c | 89 ++ 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c index 630d02b6f5..b1e5c64829 100644 --- a/libavcodec/crystalhd.c +++ b/libavcodec/crystalhd.c @@ -55,6 +55,7 @@ #include #include "avcodec.h" +#include "decode.h" #include "internal.h" #include "libavutil/imgutils.h" #include "libavutil/intreadwrite.h" @@ -763,8 +764,7 @@ static int crystalhd_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: decode_packet\n"); if (avpkt && avpkt->size) { -int32_t tx_free = (int32_t)DtsTxFreeSize(dev); - +uint64_t pts; if (!priv->bframe_bug && (avpkt->size == 6 || avpkt->size == 7)) { /* * Drop frames trigger the bug @@ -809,39 +809,33 @@ static int crystalhd_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) av_packet_unref(_packet); } -if (avpkt->size < tx_free) { -/* - * Despite being notionally opaque, either libcrystalhd or - * the hardware itself will mangle pts values that are too - * small or too large. The docs claim it should be in units - * of 100ns. Given that we're nominally dealing with a black - * box on both sides, any transform we do has no guarantee of - * avoiding mangling so we need to build a mapping to values - * we know will not be mangled. - */ -uint64_t pts = opaque_list_push(priv, avpkt->pts); -if (!pts) { -ret = AVERROR(ENOMEM); -goto exit; -} -av_log(priv->avctx, AV_LOG_VERBOSE, - "input \"pts\": %"PRIu64"\n", pts); -bc_ret = DtsProcInput(dev, avpkt->data, avpkt->size, pts, 0); -if (bc_ret == BC_STS_BUSY) { -av_log(avctx, AV_LOG_WARNING, - "CrystalHD: ProcInput returned busy\n"); -ret = AVERROR(EAGAIN); -goto exit; -} else if (bc_ret != BC_STS_SUCCESS) { -av_log(avctx, AV_LOG_ERROR, - "CrystalHD: ProcInput failed: %u\n", ret); -ret = -1; -goto exit; -} -} else { -av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Input buffer full\n"); +/* + * Despite being notionally opaque, either libcrystalhd or + * the hardware itself will mangle pts values that are too + * small or too large. The docs claim it should be in units + * of 100ns. Given that we're nominally dealing with a black + * box on both sides, any transform we do has no guarantee of + * avoiding mangling so we need to build a mapping to values + * we know will not be mangled. + */ +pts = opaque_list_push(priv, avpkt->pts); +if (!pts) { +ret = AVERROR(ENOMEM); +goto exit; +} +av_log(priv->avctx, AV_LOG_VERBOSE, + "input \"pts\": %"PRIu64"\n", pts); +bc_ret = DtsProcInput(dev, avpkt->data, avpkt->size, pts, 0); +if (bc_ret == BC_STS_BUSY) { +av_log(avctx, AV_LOG_WARNING, + "CrystalHD: ProcInput returned busy\n"); ret = AVERROR(EAGAIN); goto exit; +} else if (bc_ret != BC_STS_SUCCESS) { +av_log(avctx, AV_LOG_ERROR, + "CrystalHD: ProcInput failed: %u\n", ret); +ret = -1; +goto exit; } } else { av_log(avctx, AV_LOG_INFO, "CrystalHD: No more input data\n"); @@ -862,9 +856,35 @@ static int crystalhd_receive_frame(AVCodecContext *avctx, AVFrame *frame) CHDContext *priv = avct
[FFmpeg-cvslog] swscale: add P016 input support
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Nov 20 14:32:49 2016 -0800| [4c2176d45be1a7fbbcdf1f3d01b1ba2bab6f8d0f] | committer: Philip Langdale swscale: add P016 input support > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4c2176d45be1a7fbbcdf1f3d01b1ba2bab6f8d0f --- libswscale/input.c| 32 libswscale/swscale_unscaled.c | 4 +++- libswscale/utils.c| 2 ++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/libswscale/input.c b/libswscale/input.c index 1f4ea18..8b5f348 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -719,6 +719,28 @@ static void p010BEToUV_c(uint8_t *dstU, uint8_t *dstV, } } +static void p016LEToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ +int i; +for (i = 0; i < width; i++) { +AV_WN16(dstU + i * 2, AV_RL16(src1 + i * 4 + 0)); +AV_WN16(dstV + i * 2, AV_RL16(src1 + i * 4 + 2)); +} +} + +static void p016BEToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ +int i; +for (i = 0; i < width; i++) { +AV_WN16(dstU + i * 2, AV_RB16(src1 + i * 4 + 0)); +AV_WN16(dstV + i * 2, AV_RB16(src1 + i * 4 + 2)); +} +} + #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, @@ -1085,6 +1107,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_P010BE: c->chrToYV12 = p010BEToUV_c; break; +case AV_PIX_FMT_P016LE: +c->chrToYV12 = p016LEToUV_c; +break; +case AV_PIX_FMT_P016BE: +c->chrToYV12 = p016BEToUV_c; +break; } if (c->chrSrcHSubSample) { switch (srcFormat) { @@ -1326,6 +1354,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GRAY10LE: case AV_PIX_FMT_GRAY12LE: case AV_PIX_FMT_GRAY16LE: + +case AV_PIX_FMT_P016LE: c->lumToYV12 = bswap16Y_c; break; case AV_PIX_FMT_YUVA444P9LE: @@ -1362,6 +1392,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_GRAY10BE: case AV_PIX_FMT_GRAY12BE: case AV_PIX_FMT_GRAY16BE: + +case AV_PIX_FMT_P016BE: c->lumToYV12 = bswap16Y_c; break; case AV_PIX_FMT_YUVA444P9BE: diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index b2bfc40..ba3d688 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1878,8 +1878,10 @@ void ff_get_unscaled_swscale(SwsContext *c) c->chrDstVSubSample == c->chrSrcVSubSample && dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 && dstFormat != AV_PIX_FMT_P010LE && dstFormat != AV_PIX_FMT_P010BE && + dstFormat != AV_PIX_FMT_P016LE && dstFormat != AV_PIX_FMT_P016BE && srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21 && - srcFormat != AV_PIX_FMT_P010LE && srcFormat != AV_PIX_FMT_P010BE)) + srcFormat != AV_PIX_FMT_P010LE && srcFormat != AV_PIX_FMT_P010BE && + srcFormat != AV_PIX_FMT_P016LE && srcFormat != AV_PIX_FMT_P016BE)) { if (isPacked(c->srcFormat)) c->swscale = packedCopyWrapper; diff --git a/libswscale/utils.c b/libswscale/utils.c index 60a8e55..caae63a 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -252,6 +252,8 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = { [AV_PIX_FMT_AYUV64LE]= { 1, 1}, [AV_PIX_FMT_P010LE] = { 1, 1 }, [AV_PIX_FMT_P010BE] = { 1, 1 }, +[AV_PIX_FMT_P016LE] = { 1, 0 }, +[AV_PIX_FMT_P016BE] = { 1, 0 }, }; int sws_isSupportedInput(enum AVPixelFormat pix_fmt) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/crystalhd: Handle errors from av_image_get_linesize
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Wed Nov 30 16:03:44 2016 -0800| [5512dbe37f83d8b11393c7059c6eae48d164461c] | committer: Philip Langdale avcodec/crystalhd: Handle errors from av_image_get_linesize This function can return an error in certain situations. Fixes Coverity CID 703707. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5512dbe37f83d8b11393c7059c6eae48d164461c --- libavcodec/crystalhd.c | 5 + 1 file changed, 5 insertions(+) diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c index d85e351..630d02b 100644 --- a/libavcodec/crystalhd.c +++ b/libavcodec/crystalhd.c @@ -568,6 +568,9 @@ static inline CopyRet copy_frame(AVCodecContext *avctx, } bwidth = av_image_get_linesize(avctx->pix_fmt, width, 0); +if (bwidth < 0) + return RET_ERROR; + if (priv->is_70012) { int pStride; @@ -577,6 +580,8 @@ static inline CopyRet copy_frame(AVCodecContext *avctx, pStride = 1280; else pStride = 1920; sStride = av_image_get_linesize(avctx->pix_fmt, pStride, 0); +if (sStride < 0) +return RET_ERROR; } else { sStride = bwidth; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/vdpau_hevc: Fix potential out-of-bounds write
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Wed Nov 30 16:13:14 2016 -0800| [4e6d1c1f4ec83000a067ff14452b34c1f2d2a43a] | committer: Philip Langdale avcodec/vdpau_hevc: Fix potential out-of-bounds write The maximum number of references is 16, so the index value cannot exceed 15. Fixes Coverity CID 1348139, 1348140, 1348141 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4e6d1c1f4ec83000a067ff14452b34c1f2d2a43a --- libavcodec/vdpau_hevc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/vdpau_hevc.c b/libavcodec/vdpau_hevc.c index 03c61dc..ce2610f 100644 --- a/libavcodec/vdpau_hevc.c +++ b/libavcodec/vdpau_hevc.c @@ -234,7 +234,7 @@ static int vdpau_hevc_start_frame(AVCodecContext *avctx, const HEVCFrame *frame = >DPB[i]; if (frame != h->ref && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) { -if (j > 16) { +if (j > 15) { av_log(avctx, AV_LOG_WARNING, "VDPAU only supports up to 16 references in the DPB. " "This frame may not be decoded correctly.\n"); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] tools/coverity: Add model for av_realloc
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Wed Nov 30 14:50:36 2016 -0800| [fdb124001e9adb12e5c27cc0a9e2982f46445bf7] | committer: Philip Langdale tools/coverity: Add model for av_realloc Really should have done this last time. It should provide consistency across our allocations and frees. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fdb124001e9adb12e5c27cc0a9e2982f46445bf7 --- tools/coverity.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/tools/coverity.c b/tools/coverity.c index 3cc248c..19a132a 100644 --- a/tools/coverity.c +++ b/tools/coverity.c @@ -58,7 +58,22 @@ void *av_mallocz(size_t size) { } } +void *av_realloc(void *ptr, size_t size) { +int has_memory; +__coverity_negative_sink__(size); +if (has_memory) { +__coverity_escape__(ptr); +ptr = __coverity_alloc__(size); +__coverity_writeall__(ptr); +__coverity_mark_as_afm_allocated__(ptr, "av_free"); +return ptr; +} else { +return 0; +} +} + void *av_free(void *ptr) { __coverity_free__(ptr); __coverity_mark_as_afm_freed__(ptr, "av_free"); } + ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] tools/coverity: Add models for av_mallocz and av_free
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Nov 27 11:16:18 2016 -0800| [5eb68520635d895bbd878abf29fdb66872cbe00e] | committer: Philip Langdale tools/coverity: Add models for av_mallocz and av_free This should deal with some false positives, but might lead to more of them depending on whether it realises that av_freep() wraps av_free() or not. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5eb68520635d895bbd878abf29fdb66872cbe00e --- tools/coverity.c | 28 +--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tools/coverity.c b/tools/coverity.c index 80fc1c2..3cc248c 100644 --- a/tools/coverity.c +++ b/tools/coverity.c @@ -35,8 +35,30 @@ void *av_malloc(size_t size) { int has_memory; __coverity_negative_sink__(size); -if(has_memory) -return __coverity_alloc__(size); -else +if (has_memory) { +void *ptr = __coverity_alloc__(size); +__coverity_mark_as_uninitialized_buffer__(ptr); +__coverity_mark_as_afm_allocated__(ptr, "av_free"); + return ptr; +} else { return 0; +} +} + +void *av_mallocz(size_t size) { +int has_memory; +__coverity_negative_sink__(size); +if (has_memory) { +void *ptr = __coverity_alloc__(size); +__coverity_writeall0__(ptr); +__coverity_mark_as_afm_allocated__(ptr, "av_free"); +return ptr; +} else { +return 0; +} +} + +void *av_free(void *ptr) { +__coverity_free__(ptr); +__coverity_mark_as_afm_freed__(ptr, "av_free"); } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/nvenc: Remove aspect-ratio decompensation logic
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Wed Nov 23 14:05:18 2016 -0800| [829db8effd76b579ae9aca5ee8f85d3ade6af253] | committer: Philip Langdale avcodec/nvenc: Remove aspect-ratio decompensation logic This dubious behaviour in nvenc was finally removed by nvidia, and as we refuse to run on anything older than 7.0, we don't need to keep it around for old versions. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=829db8effd76b579ae9aca5ee8f85d3ade6af253 --- libavcodec/nvenc.c | 12 1 file changed, 12 deletions(-) diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index cd14af2..d24d278 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -953,18 +953,6 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) ctx->init_encode_params.darWidth = avctx->width; } -// De-compensate for hardware, dubiously, trying to compensate for -// playback at 704 pixel width. -if (avctx->width == 720 && -(avctx->height == 480 || avctx->height == 576)) { -av_reduce(, , - ctx->init_encode_params.darWidth * 44, - ctx->init_encode_params.darHeight * 45, - 1024 * 1024); -ctx->init_encode_params.darHeight = dh; -ctx->init_encode_params.darWidth = dw; -} - ctx->init_encode_params.frameRateNum = avctx->time_base.den; ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/cuvid: Restore initialization of pixel format in init()
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Wed Nov 23 12:10:32 2016 -0800| [dd10e7253abf280c603941613a4cc27ca347b76d] | committer: Philip Langdale avcodec/cuvid: Restore initialization of pixel format in init() I moved this into the handle_video_sequence callback because that's the earliest time you can make an accurate decision as to what the format should be. However, transcoding requires that the decision between using the accelerated PIX_FMT_CUDA vs a normal pix format happen at init() time. There is enough information available to make that decision and things work out with the underlying format only being discovered in the sequence callback. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dd10e7253abf280c603941613a4cc27ca347b76d --- libavcodec/cuvid.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c index 58a84ae..94606a9 100644 --- a/libavcodec/cuvid.c +++ b/libavcodec/cuvid.c @@ -664,6 +664,21 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) const AVBitStreamFilter *bsf; int ret = 0; +enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA, + AV_PIX_FMT_NV12, + AV_PIX_FMT_NONE }; + +// Accelerated transcoding scenarios with 'ffmpeg' require that the +// pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the +// pix_fmt for non-accelerated transcoding, do not need to be correct +// but need to be set to something. We arbitrarily pick NV12. +ret = ff_get_format(avctx, pix_fmts); +if (ret < 0) { +av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret); +return ret; +} +avctx->pix_fmt = ret; + ret = cuvid_load_functions(>cvdl); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n"); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avutil: add P016 pixel format
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Sun Nov 20 13:55:49 2016 -0800| [237421f14973a81fe342f7500db176e1c09c58f6] | committer: Philip Langdale avutil: add P016 pixel format P016 is the 16-bit variant of NV12 (planar luma, packed chroma), using two bytes per component. It may, and in fact is most likely to, be used in situations where there are less than 16 bits of data. It is the responsibility of the writer to zero out any unused LSBs. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=237421f14973a81fe342f7500db176e1c09c58f6 --- libavutil/pixdesc.c | 24 libavutil/pixfmt.h | 4 libavutil/version.h | 4 ++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index f8092ef..3b9c45d 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2078,6 +2078,30 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, }, +[AV_PIX_FMT_P016LE] = { +.name = "p016le", +.nb_components = 3, +.log2_chroma_w = 1, +.log2_chroma_h = 1, +.comp = { +{ 0, 2, 0, 0, 16, 1, 15, 1 }, /* Y */ +{ 1, 4, 0, 0, 16, 3, 15, 1 }, /* U */ +{ 1, 4, 2, 0, 16, 3, 15, 3 }, /* V */ +}, +.flags = AV_PIX_FMT_FLAG_PLANAR, +}, +[AV_PIX_FMT_P016BE] = { +.name = "p016be", +.nb_components = 3, +.log2_chroma_w = 1, +.log2_chroma_h = 1, +.comp = { +{ 0, 2, 0, 0, 16, 1, 15, 1 }, /* Y */ +{ 1, 4, 0, 0, 16, 3, 15, 1 }, /* U */ +{ 1, 4, 2, 0, 16, 3, 15, 3 }, /* V */ +}, +.flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE, +}, [AV_PIX_FMT_GBRAP12LE] = { .name = "gbrap12le", .nb_components = 4, diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 96860ce..dfb1b11 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -311,6 +311,9 @@ enum AVPixelFormat { AV_PIX_FMT_GRAY10BE, ///<Y, 10bpp, big-endian AV_PIX_FMT_GRAY10LE, ///<Y, 10bpp, little-endian +AV_PIX_FMT_P016LE, ///< like NV12, with 16bpp per component, little-endian +AV_PIX_FMT_P016BE, ///< like NV12, with 16bpp per component, big-endian + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; @@ -389,6 +392,7 @@ enum AVPixelFormat { #define AV_PIX_FMT_NV20 AV_PIX_FMT_NE(NV20BE, NV20LE) #define AV_PIX_FMT_AYUV64 AV_PIX_FMT_NE(AYUV64BE, AYUV64LE) #define AV_PIX_FMT_P010 AV_PIX_FMT_NE(P010BE, P010LE) +#define AV_PIX_FMT_P016 AV_PIX_FMT_NE(P016BE, P016LE) /** * Chromaticity coordinates of the source primaries. diff --git a/libavutil/version.h b/libavutil/version.h index 45b3c8b..57f221c 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,8 +79,8 @@ */ #define LIBAVUTIL_VERSION_MAJOR 55 -#define LIBAVUTIL_VERSION_MINOR 40 -#define LIBAVUTIL_VERSION_MICRO 101 +#define LIBAVUTIL_VERSION_MINOR 41 +#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] libavutil/hwcontext_cuda: Support P010 and P016 formats
ffmpeg | branch: master | Philip Langdale <phil...@overt.org> | Tue Nov 22 08:18:31 2016 -0800| [8d6c358ea8ece33551c5c3d489a5dce7992f4137] | committer: Philip Langdale libavutil/hwcontext_cuda: Support P010 and P016 formats CUVID is now capable of returning 10bit and 12bit decoded content in P010/P016. Let's support transfering those formats. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8d6c358ea8ece33551c5c3d489a5dce7992f4137 --- libavutil/hwcontext_cuda.c | 15 ++- libavutil/version.h| 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 30de299..2ebf4bc 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -35,6 +35,8 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, +AV_PIX_FMT_P010, +AV_PIX_FMT_P016, }; static void cuda_buffer_free(void *opaque, uint8_t *data) @@ -111,6 +113,8 @@ static int cuda_frames_init(AVHWFramesContext *ctx) size = aligned_width * ctx->height * 3 / 2; break; case AV_PIX_FMT_YUV444P: +case AV_PIX_FMT_P010: +case AV_PIX_FMT_P016: size = aligned_width * ctx->height * 3; break; } @@ -125,7 +129,14 @@ static int cuda_frames_init(AVHWFramesContext *ctx) static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) { -int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT); +int aligned_width; +int width_in_bytes = ctx->width; + +if (ctx->sw_format == AV_PIX_FMT_P010 || +ctx->sw_format == AV_PIX_FMT_P016) { + width_in_bytes *= 2; +} +aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT); frame->buf[0] = av_buffer_pool_get(ctx->pool); if (!frame->buf[0]) @@ -133,6 +144,8 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) switch (ctx->sw_format) { case AV_PIX_FMT_NV12: +case AV_PIX_FMT_P010: +case AV_PIX_FMT_P016: frame->data[0] = frame->buf[0]->data; frame->data[1] = frame->data[0] + aligned_width * ctx->height; frame->linesize[0] = aligned_width; diff --git a/libavutil/version.h b/libavutil/version.h index 57f221c..bd8342c 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -80,7 +80,7 @@ #define LIBAVUTIL_VERSION_MAJOR 55 #define LIBAVUTIL_VERSION_MINOR 41 -#define LIBAVUTIL_VERSION_MICRO 100 +#define LIBAVUTIL_VERSION_MICRO 101 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog