diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 7e96472..cbc194c 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -878,6 +878,7 @@ static int mpegts_write_packet(AVFormatContext *s, AVPacket *pkt)
     uint8_t *buf= pkt->data;
     uint8_t *data= NULL;
     MpegTSWriteStream *ts_st = st->priv_data;
+    const uint64_t max_delay = av_rescale(s->max_delay, 90000, AV_TIME_BASE);
     const uint64_t delay = av_rescale(s->max_delay, 90000, AV_TIME_BASE)*2;
     int64_t dts = AV_NOPTS_VALUE, pts = AV_NOPTS_VALUE;
 
@@ -947,6 +948,18 @@ static int mpegts_write_packet(AVFormatContext *s, AVPacket *pkt)
         }
     }
 
+    /*
+     * Flush the audio packets once we've amassed a full PES payload or
+     * once the stream has moved a certain amount of time past the first audio
+     * packet in the buffer.
+     */
+    if ((ts_st->payload_index + size > DEFAULT_PES_PAYLOAD_SIZE) ||
+        ((ts_st->payload_index > 0) && (dts - ts_st->payload_dts > max_delay))) {
+        mpegts_write_pes(s, st, ts_st->payload, ts_st->payload_index,
+                         ts_st->payload_pts, ts_st->payload_dts);
+        ts_st->payload_index = 0;
+    }
+
     if (st->codec->codec_type != AVMEDIA_TYPE_AUDIO) {
         // for video and subtitle, write a single pes packet
         mpegts_write_pes(s, st, buf, size, pts, dts);
@@ -954,12 +967,6 @@ static int mpegts_write_packet(AVFormatContext *s, AVPacket *pkt)
         return 0;
     }
 
-    if (ts_st->payload_index + size > DEFAULT_PES_PAYLOAD_SIZE) {
-        mpegts_write_pes(s, st, ts_st->payload, ts_st->payload_index,
-                         ts_st->payload_pts, ts_st->payload_dts);
-        ts_st->payload_index = 0;
-    }
-
     if (!ts_st->payload_index) {
         ts_st->payload_pts = pts;
         ts_st->payload_dts = dts;