Patches attached. - Andreas
From 362a2cdad8717c016cd05e8d782260bd1aa0751a Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 12 Mar 2025 03:26:09 +0100 Subject: [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb code
Up until now, the encoder processed only one bit at a time. With this patch, it is eight bits. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/vc2enc.c | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 4728a48938..1fe973f4cd 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -22,6 +22,7 @@ #include "libavutil/mem.h" #include "libavutil/pixdesc.h" #include "libavutil/opt.h" +#include "libavutil/thread.h" #include "libavutil/version.h" #include "codec_internal.h" #include "dirac.h" @@ -186,22 +187,39 @@ typedef struct VC2EncContext { enum DiracParseCodes last_parse_code; } VC2EncContext; +/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0 +static uint16_t interleaved_ue_golomb_tab[256]; +/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0 +static uint16_t top_interleaved_ue_golomb_tab[256]; +/// 1 x_{k-1} ... x_0 -> 2 * k +static uint8_t golomb_len_tab[256]; + +static av_cold void vc2_init_static_data(void) +{ + interleaved_ue_golomb_tab[1] = 1; + for (unsigned i = 2; i < 256; ++i) { + golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2; + interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); + top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); + } +} + static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) { - int i; - int bits = av_log2(++val); - unsigned topbit = 1 << bits; - uint64_t pbits = 0; - - for (i = 0; i < bits; i++) { - topbit >>= 1; - av_assert2(pbits <= UINT64_MAX>>3); - pbits <<= 2; - if (val & topbit) - pbits |= 0x1; + uint64_t pbits = 1; + int bits = 1; + + ++val; + + while (val >> 8) { + pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits; + val >>= 8; + bits += 16; } + pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits; + bits += golomb_len_tab[val]; - put_bits63(pb, 2 * bits + 1, (pbits << 1) | 1); + put_bits63(pb, bits, pbits); } static av_always_inline int count_vc2_ue_uint(uint32_t val) @@ -1003,6 +1021,7 @@ static av_cold int vc2_encode_end(AVCodecContext *avctx) static av_cold int vc2_encode_init(AVCodecContext *avctx) { + static AVOnce init_static_once = AV_ONCE_INIT; Plane *p; SubBand *b; int i, level, o, shift; @@ -1165,6 +1184,8 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) } } + ff_thread_once(&init_static_once, vc2_init_static_data); + return 0; } -- 2.45.2
From 389a64c00bc8244186db1abb25be8dd5ec452df7 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Wed, 12 Mar 2025 03:56:03 +0100 Subject: [PATCH 2/2] avcodec/vc2enc: Avoid excessive inlining There is no reason to inline put_vc2_ue_uint() everywhere; only one call site is actually hot: The one in encode_subband() (which accounts for 35735040 of 35739495 calls to said function in a FATE run). Uninline all the others. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/vc2enc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 1fe973f4cd..d05df64911 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -204,7 +204,7 @@ static av_cold void vc2_init_static_data(void) } } -static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) +static av_always_inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) { uint64_t pbits = 1; int bits = 1; @@ -222,6 +222,11 @@ static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) put_bits63(pb, bits, pbits); } +static av_noinline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) +{ + put_vc2_ue_uint_inline(pb, val); +} + static av_always_inline int count_vc2_ue_uint(uint32_t val) { return 2 * av_log2(val + 1) + 1; @@ -545,7 +550,7 @@ static void encode_subband(const VC2EncContext *s, PutBitContext *pb, for (y = top; y < bottom; y++) { for (x = left; x < right; x++) { uint32_t c_abs = QUANT(FFABS(coeff[x]), q_m, q_a, q_s); - put_vc2_ue_uint(pb, c_abs); + put_vc2_ue_uint_inline(pb, c_abs); if (c_abs) put_bits(pb, 1, coeff[x] < 0); } -- 2.45.2
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".