On Mon, Apr 21, 2025 at 4:19 AM Andreas Rheinhardt < andreas.rheinha...@outlook.com> wrote:
> Emma Worley: > > Improves compatibility with Resolume products by adding an additional > hashtable > > for DXT color+LUT combinations, and padding the DXT texture to the next > largest > > 16x16 multiple. Produces identical packets to Resolume Alley in manual > tests. > > > > Are the files that this encoder currently produces invalid? > Yes, if they are unaligned. > > > Signed-off-by: Emma Worley <e...@emma.gg> > > --- > > libavcodec/dxvenc.c | 105 ++++++++++++++++++++---------------- > > tests/ref/fate/dxv3enc-dxt1 | 2 +- > > 2 files changed, 60 insertions(+), 47 deletions(-) > > > > diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c > > index c64b4d5f15..737ccfff3e 100644 > > --- a/libavcodec/dxvenc.c > > +++ b/libavcodec/dxvenc.c > > @@ -34,6 +34,11 @@ > > > > #define DXV_HEADER_LENGTH 12 > > > > +/* > > + * Resolume will refuse to display frames that are not padded to 16x16 > pixels. > > + */ > > +#define DXV_ALIGN(x) FFALIGN(x, 16) > > + > > /* > > * DXV uses LZ-like back-references to avoid copying words that have > already > > * appeared in the decompressed stream. Using a simple hash table (HT) > > @@ -60,6 +65,7 @@ typedef struct DXVEncContext { > > > > struct AVHashtableContext *color_ht; > > struct AVHashtableContext *lut_ht; > > + struct AVHashtableContext *combo_ht; > > } DXVEncContext; > > > > /* Converts an index offset value to a 2-bit opcode and pushes it to a > stream. > > @@ -94,10 +100,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx) > > DXVEncContext *ctx = avctx->priv_data; > > PutByteContext *pbc = &ctx->pbc; > > void *value; > > - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, > pos = 0, op = 0; > > + uint64_t combo; > > + uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16, > pos = 0, op = 0; > > > > av_hashtable_clear(ctx->color_ht); > > av_hashtable_clear(ctx->lut_ht); > > + av_hashtable_clear(ctx->combo_ht); > > + > > + av_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos); > > > > bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data)); > > av_hashtable_set(ctx->color_ht, ctx->tex_data, &pos); > > @@ -107,51 +117,46 @@ static int dxv_compress_dxt1(AVCodecContext *avctx) > > pos++; > > > > while (pos + 2 <= ctx->tex_size / 4) { > > - idx = 0; > > - color_idx = 0; > > - lut_idx = 0; > > + combo = AV_RL64(ctx->tex_data + pos * 4); > > + combo_idx = av_hashtable_get(ctx->combo_ht, &combo, &prev_pos) > ? pos - prev_pos : 0; > > + idx = combo_idx; > > + PUSH_OP(2); > > + if (pos >= LOOKBACK_WORDS) { > > + old_pos = pos - LOOKBACK_WORDS; > > + if (av_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos > * 4, &prev_pos) && prev_pos <= old_pos) > > + av_hashtable_delete(ctx->combo_ht, ctx->tex_data + > old_pos * 4); > > + } > > + av_hashtable_set(ctx->combo_ht, &combo, &pos); > > > > color = AV_RL32(ctx->tex_data + pos * 4); > > - if (av_hashtable_get(ctx->color_ht, &color, &prev_pos)) > > - color_idx = pos - prev_pos; > > - av_hashtable_set(ctx->color_ht, &color, &pos); > > - > > + if (!combo_idx) { > > + idx = av_hashtable_get(ctx->color_ht, &color, &prev_pos) ? > pos - prev_pos : 0; > > + PUSH_OP(2); > > + if (!idx) > > + bytestream2_put_le32(pbc, color); > > + } > > if (pos >= LOOKBACK_WORDS) { > > - uint32_t old_pos = pos - LOOKBACK_WORDS; > > + old_pos = pos - LOOKBACK_WORDS; > > if (av_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos > * 4, &prev_pos) && prev_pos <= old_pos) > > av_hashtable_delete(ctx->color_ht, ctx->tex_data + > old_pos * 4); > > } > > + av_hashtable_set(ctx->color_ht, &color, &pos); > > pos++; > > > > lut = AV_RL32(ctx->tex_data + pos * 4); > > - if (color_idx && lut == AV_RL32(ctx->tex_data + (pos - > color_idx) * 4)) { > > - idx = color_idx; > > - } else { > > - idx = 0; > > - if (av_hashtable_get(ctx->lut_ht, &lut, &prev_pos)) > > - lut_idx = pos - prev_pos; > > - av_hashtable_set(ctx->lut_ht, &lut, &pos); > > + if (!combo_idx) { > > + idx = av_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos > - prev_pos : 0; > > + PUSH_OP(2); > > + if (!idx) > > + bytestream2_put_le32(pbc, lut); > > } > > if (pos >= LOOKBACK_WORDS) { > > - uint32_t old_pos = pos - LOOKBACK_WORDS; > > + old_pos = pos - LOOKBACK_WORDS; > > if (av_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * > 4, &prev_pos) && prev_pos <= old_pos) > > av_hashtable_delete(ctx->lut_ht, ctx->tex_data + > old_pos * 4); > > } > > + av_hashtable_set(ctx->lut_ht, &lut, &pos); > > pos++; > > - > > - PUSH_OP(2); > > - > > - if (!idx) { > > - idx = color_idx; > > - PUSH_OP(2); > > - if (!idx) > > - bytestream2_put_le32(pbc, color); > > - > > - idx = lut_idx; > > - PUSH_OP(2); > > - if (!idx) > > - bytestream2_put_le32(pbc, lut); > > - } > > } > > > > return 0; > > @@ -172,11 +177,23 @@ static int dxv_encode(AVCodecContext *avctx, > AVPacket *pkt, > > return ret; > > > > if (ctx->enc.tex_funct) { > > + uint8_t *dst_data[4] = {av_mallocz(DXV_ALIGN(avctx->width) * > DXV_ALIGN(avctx->height) * 4), 0, 0, 0}; > > + int dst_linesize[4] = {DXV_ALIGN(avctx->width) * 4, 0, 0, 0}; > > + > > + av_image_copy2( > > + dst_data, > > + dst_linesize, > > + frame->data, > > + frame->linesize, > > + frame->format, > > + avctx->width, > > + avctx->height); > > 1. Unchecked allocation. > 2. The buffer leaks unconditionally. > 3. The copy is definitely unnecessary if the input is already properly > aligned. > 4. Can't we avoid the copy in case the input is not properly aligned? 5. Most of dst_data will be initially zeroed only to be overwritten with > the final value again. This is wasteful. > Is there an idiomatic way to do a partial fill here? > > + > > ctx->enc.tex_data.out = ctx->tex_data; > > - ctx->enc.frame_data.in = frame->data[0]; > > - ctx->enc.stride = frame->linesize[0]; > > - ctx->enc.width = avctx->width; > > - ctx->enc.height = avctx->height; > > + ctx->enc.frame_data.in = dst_data[0]; > > + ctx->enc.stride = dst_linesize[0]; > > + ctx->enc.width = DXV_ALIGN(avctx->width); > > + ctx->enc.height = DXV_ALIGN(avctx->height); > > ff_texturedsp_exec_compress_threads(avctx, &ctx->enc); > > } else { > > /* unimplemented: YCoCg formats */ > > @@ -216,14 +233,6 @@ static av_cold int dxv_init(AVCodecContext *avctx) > > return ret; > > } > > > > - if (avctx->width % TEXTURE_BLOCK_W || avctx->height % > TEXTURE_BLOCK_H) { > > - av_log(avctx, > > - AV_LOG_ERROR, > > - "Video size %dx%d is not multiple of > "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n", > > - avctx->width, avctx->height); > > - return AVERROR_INVALIDDATA; > > - } > > - > > ff_texturedspenc_init(&texdsp); > > > > switch (ctx->tex_fmt) { > > @@ -237,10 +246,10 @@ static av_cold int dxv_init(AVCodecContext *avctx) > > return AVERROR_INVALIDDATA; > > } > > ctx->enc.raw_ratio = 16; > > - ctx->tex_size = avctx->width / TEXTURE_BLOCK_W * > > - avctx->height / TEXTURE_BLOCK_H * > > + ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W * > > + DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H * > > ctx->enc.tex_ratio; > > - ctx->enc.slice_count = av_clip(avctx->thread_count, 1, > avctx->height / TEXTURE_BLOCK_H); > > + ctx->enc.slice_count = av_clip(avctx->thread_count, 1, > DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H); > > > > ctx->tex_data = av_malloc(ctx->tex_size); > > if (!ctx->tex_data) { > > @@ -251,6 +260,9 @@ static av_cold int dxv_init(AVCodecContext *avctx) > > if (ret < 0) > > return ret; > > ret = av_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), > sizeof(uint32_t), LOOKBACK_HT_ELEMS); > > + if (ret < 0) > > + return ret; > > + ret = av_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t), > sizeof(uint32_t), LOOKBACK_HT_ELEMS); > > if (ret < 0) > > return ret; > > > > @@ -265,6 +277,7 @@ static av_cold int dxv_close(AVCodecContext *avctx) > > > > av_hashtable_freep(&ctx->color_ht); > > av_hashtable_freep(&ctx->lut_ht); > > + av_hashtable_freep(&ctx->combo_ht); > > > > return 0; > > } > > diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1 > > index 74849a8031..e09000e181 100644 > > --- a/tests/ref/fate/dxv3enc-dxt1 > > +++ b/tests/ref/fate/dxv3enc-dxt1 > > @@ -3,4 +3,4 @@ > > #codec_id 0: dxv > > #dimensions 0: 1920x1080 > > #sar 0: 1/1 > > -0, 0, 0, 1, 76521, 0xed387a5e > > +0, 0, 0, 1, 76190, 0x0e6f0326 > > Is padding the reason for this change or is it the new hashtable? Or both? > Both. For more context, the padding is solely for player compatibility while the new hashtable optimize output file size. With both I see identical packets to the de facto closed source encoder (Resolume Alley) given the same DXT1 texture going into dxv_compress_dxt1. > > - Andreas > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".