From 9de9d327883d649d8ecaad2194e6bead79600194 Mon Sep 17 00:00:00 2001
From: Joe Koberg <jkoberg@gmail.com>
Date: Thu, 15 Mar 2018 13:08:14 -0400
Subject: [PATCH] Introduce -force_dts_monotonicity to fix timestamp /
 non-monotonous dts

Applies a timestamp offset to incoming packets to sanitize discontinuities
in concatenated files or from containers such as HLS or MPEGTS.

The offset is always updated when DTS appears to go backwards, and is
updated when PTS or DTS jumps forward more than a second (based on the
default value for the -dts_monotonicity_threshold parameter)

Co-authored-by: Ibrahim Moreno <imoreno138@gmail.com>
Co-authored-by: Will Crick <talmadgecrick@gmail.com>
---
 fftools/ffmpeg.c     | 54 +++++++++++++++++++++++++++++++++++++++++++++++-----
 fftools/ffmpeg.h     |  7 +++++++
 fftools/ffmpeg_opt.c |  8 ++++++++
 3 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index ee7258fcd1..b450ccf25b 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -4387,10 +4387,14 @@ static int process_input(int file_index)
         pkt.dts *= ist->ts_scale;
 
     pkt_dts = av_rescale_q_rnd(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
-    if ((ist->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
-         ist->dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) &&
-        pkt_dts != AV_NOPTS_VALUE && ist->next_dts == AV_NOPTS_VALUE && !copy_ts
-        && (is->iformat->flags & AVFMT_TS_DISCONT) && ifile->last_ts != AV_NOPTS_VALUE) {
+    if ((ist->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO || ist->dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) &&
+        pkt_dts != AV_NOPTS_VALUE &&
+        ist->next_dts == AV_NOPTS_VALUE &&
+        !copy_ts &&
+        (is->iformat->flags & AVFMT_TS_DISCONT) &&
+        ifile->last_ts != AV_NOPTS_VALUE &&
+        !force_dts_monotonicity
+    ) {
         int64_t delta   = pkt_dts - ifile->last_ts;
         if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
             delta >  1LL*dts_delta_threshold*AV_TIME_BASE){
@@ -4418,7 +4422,9 @@ static int process_input(int file_index)
     if ((ist->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
          ist->dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) &&
          pkt_dts != AV_NOPTS_VALUE && ist->next_dts != AV_NOPTS_VALUE &&
-        !copy_ts) {
+        !copy_ts &&
+        !force_dts_monotonicity
+    ) {
         int64_t delta   = pkt_dts - ist->next_dts;
         if (is->iformat->flags & AVFMT_TS_DISCONT) {
             if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
@@ -4453,6 +4459,44 @@ static int process_input(int file_index)
     if (pkt.dts != AV_NOPTS_VALUE)
         ifile->last_ts = av_rescale_q(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q);
 
+
+    if (force_dts_monotonicity &&
+        (pkt.pts != AV_NOPTS_VALUE || pkt.dts != AV_NOPTS_VALUE) &&
+        (ist->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO || ist->dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO)
+    ) {
+        int64_t ff_pts_error = 0;
+        int64_t ff_dts_error = 0;
+        int64_t ff_dts_threshold = av_rescale_q(dts_monotonicity_threshold, AV_TIME_BASE_Q, ist->st->time_base);
+
+        // adjust the incoming packet by the accumulated monotonicity error
+        if (pkt.pts != AV_NOPTS_VALUE) {
+            pkt.pts += ifile->ff_timestamp_monotonicity_offset;
+            if (ist->next_pts != AV_NOPTS_VALUE) {
+                ff_pts_error = av_rescale_q(ist->next_pts, AV_TIME_BASE_Q, ist->st->time_base) - pkt.pts;
+            }
+        }
+        if (pkt.dts != AV_NOPTS_VALUE) {
+            pkt.dts += ifile->ff_timestamp_monotonicity_offset;
+            if (ist->next_dts != AV_NOPTS_VALUE) {
+                ff_dts_error = av_rescale_q(ist->next_dts, AV_TIME_BASE_Q, ist->st->time_base) - pkt.dts;
+            }
+        }
+
+        if(ff_dts_error > 0 || ff_dts_error < (-ff_dts_threshold) || ff_pts_error < (-ff_dts_threshold)) {
+            if(pkt.dts == AV_NOPTS_VALUE /*  || ist->next_dts != AV_NOPTS_VALUE */ ) {
+                pkt.pts += ff_pts_error;
+                ifile->ff_timestamp_monotonicity_offset += ff_pts_error;
+                av_log(is, AV_LOG_INFO, "Incoming PTS error %"PRId64", offsetting subsequent timestamps by %"PRId64" to correct\n", ff_pts_error, ifile->ff_timestamp_monotonicity_offset);
+            }
+            else {
+                pkt.pts += ff_dts_error;
+                pkt.dts += ff_dts_error;
+                ifile->ff_timestamp_monotonicity_offset += ff_dts_error;
+                av_log(is, AV_LOG_INFO, "Incoming DTS error %"PRId64", offsetting subsequent timestamps by %"PRId64" to correct\n", ff_dts_error, ifile->ff_timestamp_monotonicity_offset);
+            }
+        }
+    }
+
     if (debug_ts) {
         av_log(NULL, AV_LOG_INFO, "demuxer+ffmpeg -> ist_index:%d type:%s pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s off:%s off_time:%s\n",
                ifile->ist_index + pkt.stream_index, av_get_media_type_string(ist->dec_ctx->codec_type),
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 8195f73e8b..a9337a1a96 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -419,6 +419,10 @@ typedef struct InputFile {
     int joined;                 /* the thread has been joined */
     int thread_queue_size;      /* maximum number of queued packets */
 #endif
+
+    // A value added to inbound timestamps to prevent them from going "backward" in cases such as HLS discontinuities
+    int64_t ff_timestamp_monotonicity_offset;
+
 } InputFile;
 
 enum forced_keyframes_const {
@@ -586,6 +590,9 @@ extern float audio_drift_threshold;
 extern float dts_delta_threshold;
 extern float dts_error_threshold;
 
+extern int dts_monotonicity_threshold;
+extern int force_dts_monotonicity; 
+
 extern int audio_volume;
 extern int audio_sync_method;
 extern int video_sync_method;
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index d7a7eb0662..37b51d0589 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -87,6 +87,9 @@ float audio_drift_threshold = 0.1;
 float dts_delta_threshold   = 10;
 float dts_error_threshold   = 3600*30;
 
+int dts_monotonicity_threshold = AV_TIME_BASE;
+int force_dts_monotonicity = 0;
+
 int audio_volume      = 256;
 int audio_sync_method = 0;
 int video_sync_method = VSYNC_AUTO;
@@ -1154,6 +1157,7 @@ static int open_input_file(OptionsContext *o, const char *filename)
     f->recording_time = o->recording_time;
     f->input_ts_offset = o->input_ts_offset;
     f->ts_offset  = o->input_ts_offset - (copy_ts ? (start_at_zero && ic->start_time != AV_NOPTS_VALUE ? ic->start_time : 0) : timestamp);
+    f->ff_timestamp_monotonicity_offset = 0;
     f->nb_streams = ic->nb_streams;
     f->rate_emu   = o->rate_emu;
     f->accurate_seek = o->accurate_seek;
@@ -3403,6 +3407,10 @@ const OptionDef options[] = {
     { "apad",           OPT_STRING | HAS_ARG | OPT_SPEC |
                         OPT_OUTPUT,                                  { .off = OFFSET(apad) },
         "audio pad", "" },
+    { "force_dts_monotonicity", OPT_BOOL | OPT_EXPERT,               { &force_dts_monotonicity },
+        "correct dts monotonicity errors" },
+    { "dts_monotonicity_threshold",    HAS_ARG | OPT_INT | OPT_EXPERT,{ &dts_monotonicity_threshold },
+        "dts correction threshold for forward jumps", "microseconds" },
     { "dts_delta_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT,       { &dts_delta_threshold },
         "timestamp discontinuity delta threshold", "threshold" },
     { "dts_error_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT,       { &dts_error_threshold },
-- 
2.16.2