From: boxu <[email protected]> VideoToolbox encoder with B-frames can produce non-monotonic DTS values, causing playback issues. This patch implements a complete timestamp reset strategy to ensure correct PTS/DTS ordering.
The fix: - Generates monotonic DTS values based on decode order (frame counter) - Preserves VideoToolbox's presentation order for PTS - Adds offset (max_b_frames) to ensure DTS <= PTS for all frames - Normalizes timestamps to start from zero Testing command: ffmpeg -hwaccel videotoolbox -i input.mov -c:v h264_videotoolbox -profile:v high -bf 3 -b:v 2M -y output.mov Before this patch: [vost#0:0/h264_videotoolbox @ 0xc61058000] Invalid DTS: 1024 PTS: 512, replacing by guess [vost#0:0/h264_videotoolbox @ 0xc61058000] Invalid DTS: 3072 PTS: 2560, replacing by guess [vost#0:0/h264_videotoolbox @ 0xc61058000] Invalid DTS: 5632 PTS: 5120, replacing by guess [vost#0:0/h264_videotoolbox @ 0xc61058000] Invalid DTS: 7680 PTS: 7168, replacing by guess [vost#0:0/h264_videotoolbox @ 0xc61058000] Invalid DTS: 10240 PTS: 9728, replacing by guess After this patch: No DTS errors, clean output with proper muxing. Tested on macOS with B-frame encoding enabled. Signed-off-by: boxu <[email protected]> --- libavcodec/videotoolboxenc.c | 53 ++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/libavcodec/videotoolboxenc.c b/libavcodec/videotoolboxenc.c index 729072c0b9..a360867a61 100644 --- a/libavcodec/videotoolboxenc.c +++ b/libavcodec/videotoolboxenc.c @@ -264,6 +264,13 @@ typedef struct VTEncContext { int64_t first_pts; int64_t dts_delta; + int64_t last_dts; // Track last DTS for B-frame monotonicity + int64_t last_non_b_dts; // Track last I/P frame DTS for B-frame reference + + // Full PTS/DTS reset for B-frames + int64_t base_pts; // Base PTS value (first frame's PTS) + int64_t base_dts; // Base DTS value (always 0) + int64_t vt_pts_first; // VideoToolbox's first frame PTS for offset calculation int profile; int level; @@ -2320,10 +2327,43 @@ static int vtenc_cm_to_avpacket( } } - dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0; time_base_num = avctx->time_base.num; - pkt->pts = pts.value / time_base_num; - pkt->dts = dts.value / time_base_num - dts_delta; + dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0; + int64_t vt_pts = pts.value / time_base_num; + int64_t vt_dts = dts.value / time_base_num; + + // VideoToolbox with B-frames: Fully reset both PTS and DTS from scratch + // Strategy: Maintain our own base timestamps and completely regenerate values + // - DTS = frame_ct_out (decode order, always monotonic) + // - PTS = preserve VideoToolbox's presentation order + offset for B-frames + // Key insight: Add offset equal to max_b_frames so that earliest B-frames have PTS > DTS + if (vtctx->has_b_frames) { + // First frame: record VideoToolbox's PTS offset for normalization + if (vtctx->vt_pts_first == AV_NOPTS_VALUE) { + vtctx->vt_pts_first = vt_pts; + // Add offset equal to max_b_frames to ensure DTS <= PTS for all frames + // This gives B-frames enough "headroom" to be decoded before display + vtctx->base_pts = avctx->max_b_frames; + vtctx->base_dts = 0; // DTS starts from 0 + av_log(avctx, AV_LOG_DEBUG, "First frame: VT_PTS=%lld, VT_DTS=%lld, setting base_pts=%lld (max_b_frames=%d)\n", + vt_pts, vt_dts, vtctx->base_pts, avctx->max_b_frames); + } + + // Generate DTS from frame counter (decode order: 0, 1, 2, 3, ...) + // Note: frame_ct_out starts at 1, so subtract 1 to start DTS from 0 + pkt->dts = vtctx->base_dts + (vtctx->frame_ct_out - 1); + + // Generate PTS preserving VideoToolbox's presentation order with B-frame offset + // Normalize VideoToolbox's PTS and add base_pts offset + pkt->pts = vtctx->base_pts + (vt_pts - vtctx->vt_pts_first); + + av_log(avctx, AV_LOG_DEBUG, "Reset timestamps: frame=%lld, VT_PTS=%lld, VT_DTS=%lld, final_PTS=%lld, final_DTS=%lld\n", + vtctx->frame_ct_out, vt_pts, vt_dts, pkt->pts, pkt->dts); + } else { + // No B-frames: use VideoToolbox's timestamps with adjustment + pkt->pts = vt_pts; + pkt->dts = vt_dts - dts_delta; + } return 0; } @@ -2828,6 +2868,13 @@ pe_cleanup: } vtctx->frame_ct_out = 0; + vtctx->last_dts = AV_NOPTS_VALUE; + vtctx->last_non_b_dts = AV_NOPTS_VALUE; + + // Initialize PTS/DTS reset variables + vtctx->base_pts = 0; + vtctx->base_dts = 0; + vtctx->vt_pts_first = AV_NOPTS_VALUE; av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0)); if (!status) -- 2.39.5 (Apple Git-154) _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
