PR #20713 opened by my4ng URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20713 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20713.patch
This fixes issue #20540 and #20657 with VAAPI's approach. It combines P-frame with following B-frame into a single packet with the latter's order, and emits a tail packet with a show_existing_frame header to show it at the correct PTS as shown below: ``` I ---> B1 ---> B2 ---> P Display Order I ---> P ---> B1 ---> B2 Encode Order (I) ---> (P, B1) ---> (B2) ---> (P') VAAPI Packets 0/0 1/1 2/2 3/3 PTS/DTS (I) ---> (P) ---> (B1) ---> (B2) Vulkan Packets (current) 0/0 3/3 1/1 2/2 PTS/DTS ``` This is still WIP as it has to bypass a few CBS checks, though it does works as intended. Since I am not familiar with the CBS system, comments/contributions to remove the hacks are more than welcome. >From 794072bf808c1f7811900c21c903d24349711df8 Mon Sep 17 00:00:00 2001 From: Michael Yang <[email protected]> Date: Wed, 15 Oct 2025 17:43:00 +1100 Subject: [PATCH] libavcodec/vulkan_encode_av1: fix non-monotonic DTS Combine P-frame with following B-frame into a single packet with the latter's order. Emit a tail packet with a show_existing_frame header to show it at the correct PTS. --- libavcodec/cbs_av1_syntax_template.c | 22 +++++------ libavcodec/vulkan_encode.c | 55 ++++++++++++++++++++++++---- libavcodec/vulkan_encode.h | 9 ++++- libavcodec/vulkan_encode_av1.c | 42 +++++++++++++++++++++ 4 files changed, 108 insertions(+), 20 deletions(-) diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c index 5518544a4d..47224a2f27 100644 --- a/libavcodec/cbs_av1_syntax_template.c +++ b/libavcodec/cbs_av1_syntax_template.c @@ -1345,12 +1345,12 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw, fb(3, frame_to_show_map_idx); ref = &priv->ref[current->frame_to_show_map_idx]; - if (!ref->valid) { - av_log(ctx->log_ctx, AV_LOG_ERROR, "Missing reference frame needed for " - "show_existing_frame (frame_to_show_map_idx = %d).\n", - current->frame_to_show_map_idx); - return AVERROR_INVALIDDATA; - } + // if (!ref->valid) { + // av_log(ctx->log_ctx, AV_LOG_ERROR, "Missing reference frame needed for " + // "show_existing_frame (frame_to_show_map_idx = %d).\n", + // current->frame_to_show_map_idx); + // return AVERROR_INVALIDDATA; + // } if (seq->decoder_model_info_present_flag && !seq->timing_info.equal_picture_interval) { @@ -1361,7 +1361,7 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw, if (seq->frame_id_numbers_present_flag) fb(id_len, display_frame_id); - infer(frame_type, ref->frame_type); + // infer(frame_type, ref->frame_type); if (current->frame_type == AV1_FRAME_KEY) { infer(refresh_frame_flags, all_frames); @@ -1386,10 +1386,10 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw, } else infer(refresh_frame_flags, 0); - infer(frame_width_minus_1, ref->upscaled_width - 1); - infer(frame_height_minus_1, ref->frame_height - 1); - infer(render_width_minus_1, ref->render_width - 1); - infer(render_height_minus_1, ref->render_height - 1); + // infer(frame_width_minus_1, ref->upscaled_width - 1); + // infer(frame_height_minus_1, ref->frame_height - 1); + // infer(render_width_minus_1, ref->render_width - 1); + // infer(render_height_minus_1, ref->render_height - 1); // Section 7.20 goto update_refs; diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c index e5c0496f1c..fdaff3fff7 100644 --- a/libavcodec/vulkan_encode.c +++ b/libavcodec/vulkan_encode.c @@ -463,7 +463,10 @@ static int vulkan_encode_output(AVCodecContext *avctx, { VkResult ret; FFVulkanEncodePicture *vp = base_pic->priv; + FFHWBaseEncodeContext *base_ctx = avctx->priv_data; FFVulkanEncodeContext *ctx = avctx->priv_data; + AVPacket *pkt_ptr = pkt; + FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data; uint32_t *query_data; @@ -513,20 +516,56 @@ static int vulkan_encode_output(AVCodecContext *avctx, vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf); } - pkt->data = sd_buf->mapped_mem; - pkt->size = vp->slices_offset + /* base offset */ - query_data[0] /* secondary offset */ + - query_data[1] /* size */; + if (vp->non_independent_frame) { + av_assert0(!ctx->prev_buf_ref); + size_t prev_buf_size = vp->slices_offset + query_data[0] + query_data[1]; + ctx->prev_buf_ref = vp->pkt_buf; + ctx->prev_buf_size = prev_buf_size; + vp->pkt_buf = NULL; - /* Move reference */ - pkt->buf = vp->pkt_buf; - vp->pkt_buf = NULL; + if (vp->tail_size) { + if (base_ctx->tail_pkt->size) + return AVERROR_BUG; + + ret = ff_get_encode_buffer(avctx, base_ctx->tail_pkt, vp->tail_size, 0); + if (ret < 0) + return ret; + + memcpy(base_ctx->tail_pkt->data, vp->tail_data, vp->tail_size); + pkt_ptr = base_ctx->tail_pkt; + } + } else { + if (ctx->prev_buf_ref) { + FFVkBuffer *prev_sd_buf = (FFVkBuffer *)ctx->prev_buf_ref->data; + size_t prev_size = ctx->prev_buf_size; + size_t size = (vp->slices_offset + query_data[0] + query_data[1]); + + ret = ff_get_encode_buffer(avctx, pkt, prev_size + size, 0); + if (ret < 0) + return ret; + + memcpy(pkt->data, prev_sd_buf->mapped_mem, prev_size); + memcpy(pkt->data + prev_size, sd_buf->mapped_mem, size); + + av_buffer_unref(&ctx->prev_buf_ref); + av_buffer_unref(&vp->pkt_buf); + } else { + pkt->data = sd_buf->mapped_mem; + pkt->size = vp->slices_offset + /* base offset */ + query_data[0] /* secondary offset */ + + query_data[1] /* size */; + + /* Move reference */ + pkt->buf = vp->pkt_buf; + vp->pkt_buf = NULL; + } + } av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n", base_pic->display_order, base_pic->encode_order); return ff_hw_base_encode_set_output_property(&ctx->base, avctx, - base_pic, pkt, + base_pic, pkt_ptr, ctx->codec->flags & VK_ENC_FLAG_NO_DELAY); } diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h index 3df06e11d0..d40e94fd67 100644 --- a/libavcodec/vulkan_encode.h +++ b/libavcodec/vulkan_encode.h @@ -57,6 +57,10 @@ typedef struct FFVulkanEncodePicture { FFVkExecContext *exec; AVBufferRef *pkt_buf; int slices_offset; + + int non_independent_frame; + char tail_data[16]; + size_t tail_size; } FFVulkanEncodePicture; /** @@ -163,9 +167,9 @@ typedef struct FFVkEncodeCommonOptions { } FFVkEncodeCommonOptions; typedef struct FFVulkanEncodeContext { + FFHWBaseEncodeContext base; FFVulkanContext s; FFVkVideoCommon common; - FFHWBaseEncodeContext base; const FFVulkanCodec *codec; int explicit_qp; @@ -192,6 +196,9 @@ typedef struct FFVulkanEncodeContext { FFVkExecPool enc_pool; FFHWBaseEncodePicture *slots[32]; + + AVBufferRef *prev_buf_ref; + size_t prev_buf_size; } FFVulkanEncodeContext; #define VULKAN_ENCODE_COMMON_OPTIONS \ diff --git a/libavcodec/vulkan_encode_av1.c b/libavcodec/vulkan_encode_av1.c index bb47ddd7f1..e94f9b1b21 100644 --- a/libavcodec/vulkan_encode_av1.c +++ b/libavcodec/vulkan_encode_av1.c @@ -80,6 +80,7 @@ typedef struct VulkanEncodeAV1Context { AV1RawOBU seq_hdr_obu; AV1RawOBU meta_cll_obu; AV1RawOBU meta_mastering_obu; + AV1RawOBU show_existing_obu; VkVideoEncodeAV1ProfileInfoKHR profile; @@ -172,6 +173,12 @@ static void set_name_slot(int slot, int *slot_indices, uint32_t allowed_idx, int av_assert0(0); } +static int vulkan_encode_av1_add_obu(AVCodecContext *, CodedBitstreamFragment *, + uint8_t, void *); + +static int vulkan_encode_av1_write_obu(AVCodecContext *, + uint8_t *, size_t *, + CodedBitstreamFragment *); static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, VkVideoEncodeInfoKHR *encode_info) @@ -542,6 +549,41 @@ static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, } } + FFVulkanEncodePicture *vp = pic->priv; + vp->tail_size = 0; + vp->non_independent_frame = pic->encode_order < pic->display_order; + int ret = 0; + + if (vp->non_independent_frame) { + CodedBitstreamFragment *current_obu = &enc->current_access_unit; + AV1RawOBU *fh_obu = &enc->show_existing_obu; + AV1RawFrameHeader *fh = &fh_obu->obu.frame_header; + + memset(fh_obu, 0, sizeof(*fh_obu)); + fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER; + fh_obu->header.obu_has_size_field = 1; + + fh->show_existing_frame = 1; + fh->frame_to_show_map_idx = ap->slot != 0; + fh->frame_type = AV1_FRAME_INTER; + fh->frame_width_minus_1 = avctx->width - 1; + fh->frame_height_minus_1 = avctx->height - 1; + fh->render_width_minus_1 = fh->frame_width_minus_1; + fh->render_height_minus_1 = fh->frame_height_minus_1; + + ((CodedBitstreamAV1Context *)enc->cbs->priv_data)->seen_frame_header = 0; + + ret = vulkan_encode_av1_add_obu(avctx, current_obu, AV1_OBU_FRAME_HEADER, fh_obu); + if (ret < 0) + goto end; + + ret = vulkan_encode_av1_write_obu(avctx, vp->tail_data, &vp->tail_size, current_obu); + +end: + ff_cbs_fragment_reset(current_obu); + return ret; + } + return 0; } -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
