PR #21558 opened by Niklas Haas (haasn) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21558 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21558.patch
Currently, the HTTP protocol will fully tear down and re-establish the entire connection for every single seek; as a result of using unbounded range requests. This is very inefficient especially for the common subset of formats that require (frequently) seeking to the end of the file. For example, reading a simple MXF file currently opens 5 separate connections; due to repeated seeks back and forth between the start and end. We can dramatically improve the status quo by using keep-alive connections (already mapped to the existing option `-multiple_requests 1`, but not really used for anything useful at the moment) and sending partial requests. I arbitrarily set the default to 32 kB, which is in the same order of magnitude as the TCP packet limit (64k), and matches the default AVIO buffer size (32kB). To prevent re-requests for additional data from becoming too much of a bottleneck, we also grow this size exponentially with every subsequent request (reset on a seek). To really be useful for seeking, the user ideally should also set the `-short_seek_size` option to something above the request size. (Maybe we should also default this option to e.g. 1 MB for HTTP connections?) After these patches, FFmpeg will rarely open more than a single connection per source, even for seek-heavy formats like MXF. >From 88aa6f22006350c13d226fc4b5a083c95c1b3d77 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Thu, 22 Jan 2026 15:31:02 +0100 Subject: [PATCH 1/7] avformat/http: fix noop seek check This fails to consider the case of whence == SEEK_END and the resulting offset happening to exactly match the current position. Reorder the check to compute the target position first, then compare. --- libavformat/http.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libavformat/http.c b/libavformat/http.c index 6ee498b4df..f565e1e7fa 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -2008,10 +2008,6 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo if (whence == AVSEEK_SIZE) return s->filesize; - else if (!force_reconnect && - ((whence == SEEK_CUR && off == 0) || - (whence == SEEK_SET && off == s->off))) - return s->off; else if ((s->filesize == UINT64_MAX && whence == SEEK_END)) return AVERROR(ENOSYS); @@ -2023,6 +2019,8 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo return AVERROR(EINVAL); if (off < 0) return AVERROR(EINVAL); + if (!force_reconnect && off == s->off) + return s->off; s->off = off; if (s->off && h->is_streamed) -- 2.52.0 >From 12fe7bad775637c1c195e66e1915b84e042a489c Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 23 Jan 2026 10:25:01 +0100 Subject: [PATCH 2/7] avformat/http: print error on HTTP response failure This currently fails silently with zero indication of what the problem might be, which tripped me up a bit while debugging. --- libavformat/http.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libavformat/http.c b/libavformat/http.c index f565e1e7fa..03f417100f 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -1419,8 +1419,11 @@ static int http_read_header(URLContext *h) for (;;) { int parsed_http_code = 0; - if ((err = http_get_line(s, line, sizeof(line))) < 0) + if ((err = http_get_line(s, line, sizeof(line))) < 0) { + av_log(h, AV_LOG_ERROR, "Error reading HTTP response: %s\n", + av_err2str(err)); return err; + } av_log(h, AV_LOG_TRACE, "header='%s'\n", line); -- 2.52.0 >From 3c999fcd7f9e29aee8a90d55ad71c399f547c0bd Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 23 Jan 2026 10:37:02 +0100 Subject: [PATCH 3/7] avformat/http: parse range size from Content-Range header In the event that the range returned is smaller than the true filesize, we should only expect to receive that many bytes - not the entire rest of the file. This commit is theoretically non-functional on its own, since any conforming HTTP server will always return us the full file range, but I wanted to split it off from the subsequent changes in order to make review easier. --- libavformat/http.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/libavformat/http.c b/libavformat/http.c index 03f417100f..0fbf84165a 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -78,7 +78,7 @@ typedef struct HTTPContext { /* Used if "Transfer-Encoding: chunked" otherwise -1. */ uint64_t chunksize; int chunkend; - uint64_t off, end_off, filesize; + uint64_t off, end_off, filesize, range_end; char *uri; char *location; HTTPAuthState auth_state; @@ -892,11 +892,13 @@ static int parse_location(HTTPContext *s, const char *p) static void parse_content_range(URLContext *h, const char *p) { HTTPContext *s = h->priv_data; - const char *slash; + const char *slash, *end; if (!strncmp(p, "bytes ", 6)) { p += 6; s->off = strtoull(p, NULL, 10); + if ((end = strchr(p, '-')) && strlen(end) > 0) + s->range_end = strtoull(end + 1, NULL, 10) + 1; if ((slash = strchr(p, '/')) && strlen(slash) > 0) s->filesize_from_content_range = strtoull(slash + 1, NULL, 10); } @@ -1702,8 +1704,9 @@ static int http_buf_read(URLContext *h, uint8_t *buf, int size) memcpy(buf, s->buf_ptr, len); s->buf_ptr += len; } else { - uint64_t target_end = s->end_off ? s->end_off : s->filesize; - if ((!s->willclose || s->chunksize == UINT64_MAX) && s->off >= target_end) + uint64_t file_end = s->end_off ? s->end_off : s->filesize; + uint64_t target_end = s->range_end ? s->range_end : file_end; + if ((!s->willclose || s->chunksize == UINT64_MAX) && s->off >= file_end) return AVERROR_EOF; len = ffurl_read(s->hd, buf, size); if ((!len || len == AVERROR_EOF) && -- 2.52.0 >From f20834c0b58560d3c06e16485a213e4b8500a295 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 23 Jan 2026 11:16:16 +0100 Subject: [PATCH 4/7] avformat/http: return EIO if s->hd is NULL This could conceivably happen currently if the user tries reading more bytes after the last chunk has already been received. In this case, we currently segfault - but simply returning AVERROR(EIO) seems more reasonable and lets the higher end retry the connection in this case. --- libavformat/http.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libavformat/http.c b/libavformat/http.c index 0fbf84165a..16da46a0e1 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -1658,6 +1658,9 @@ static int http_buf_read(URLContext *h, uint8_t *buf, int size) HTTPContext *s = h->priv_data; int len; + if (!s->hd) + return AVERROR(EIO); + if (s->chunksize != UINT64_MAX) { if (s->chunkend) { return AVERROR_EOF; -- 2.52.0 >From 01a1d13e8cbe4d91df9ac095eebd7457b6f6996a Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 23 Jan 2026 10:43:24 +0100 Subject: [PATCH 5/7] avformat/http: request more data after partial response If the Content-Range indicates a smaller range than what we expected, we should send a new request for the remainder before attempting to read more. Again, this commit is theoretically non-functional on its own, since any conforming HTTP server should give us the entire range we asked for in the first place, but it is semantically independent from and prepares us for the following changes. --- libavformat/http.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libavformat/http.c b/libavformat/http.c index 16da46a0e1..e813bccfe1 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -1711,6 +1711,8 @@ static int http_buf_read(URLContext *h, uint8_t *buf, int size) uint64_t target_end = s->range_end ? s->range_end : file_end; if ((!s->willclose || s->chunksize == UINT64_MAX) && s->off >= file_end) return AVERROR_EOF; + if (s->off == target_end && target_end < file_end) + return AVERROR(EAGAIN); /* reached end of content range */ len = ffurl_read(s->hd, buf, size); if ((!len || len == AVERROR_EOF) && (!s->willclose || s->chunksize == UINT64_MAX) && s->off < target_end) { @@ -1795,6 +1797,14 @@ static int http_read_stream(URLContext *h, uint8_t *buf, int size) if (read_ret == AVERROR_EXIT) break; + else if (read_ret == AVERROR(EAGAIN)) { + /* send new request for more data on existing connection */ + AVDictionary *options = NULL; + read_ret = http_open_cnx(h, &options); + av_dict_free(&options); + if (read_ret == 0) + goto retry; + } if (h->is_streamed && !s->reconnect_streamed) break; @@ -1824,6 +1834,7 @@ static int http_read_stream(URLContext *h, uint8_t *buf, int size) return read_ret; } +retry: read_ret = http_buf_read(h, buf, size); } -- 2.52.0 >From d975498c2f8283b259f9eea58639e43c0a6ed8c1 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 23 Jan 2026 10:46:55 +0100 Subject: [PATCH 6/7] avformat/http: re-use keep-alive connection for small seeks When the previous reply was a partial response (e.g. due to a seek to the end of the file), and the remaining data from that partial response is below the short seek size threshold, we can serve this seek by just draining that data and re-using the existing connection. This can currently only happen when using keep-alive connections (-multiple_requests 1) and seeking from the end of the file to somewhere else, in which case the file's tail can be drained and the connection re-used. Under other circumstances, however, we still need to force a reconnection, because we do not yet send partial range requests. (This will be changed in the following commit) We need to take special care not to regress the existing fallback logic for when `http_open_cnx` fails, so here is a quick case analysis: non-drain path: - identical to the current soft drain fails: (ffurl_read error path) - s->hd = old_hd = NULL - http_open_cnx() always opens a new connection - on failure, old buffer is restored and s->hd remains NULL soft drain succeeds, http_open_cnx() fails: - s->hd is set to NULL by http_open_cnx() failure path - old_hd was never set, so remains NULL - old buffer is restored, s->hd remains NULL In either case, the outcome that any (previously valid) buffer is left as-is, the offset is unchanged, and the connection ends up closed (s->hd == NULL). This is okay to do after the previous change to http_buf_read, which allows it to internally re-open the connection if needed. --- libavformat/http.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/libavformat/http.c b/libavformat/http.c index e813bccfe1..83f87f8b85 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -2020,11 +2020,12 @@ static int http_close(URLContext *h) static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int force_reconnect) { HTTPContext *s = h->priv_data; - URLContext *old_hd = s->hd; + URLContext *old_hd = NULL; uint64_t old_off = s->off; uint8_t old_buf[BUFFER_SIZE]; int old_buf_size, ret; AVDictionary *options = NULL; + uint8_t discard[4096]; if (whence == AVSEEK_SIZE) return s->filesize; @@ -2066,7 +2067,27 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo /* we save the old context in case the seek fails */ old_buf_size = s->buf_end - s->buf_ptr; memcpy(old_buf, s->buf_ptr, old_buf_size); - s->hd = NULL; + + /* try to reuse existing connection for small seeks */ + uint64_t remaining = s->range_end - old_off - old_buf_size; + if (!s->willclose && s->range_end && remaining <= s->short_seek_size) { + /* drain remaining data left on the wire from previous request */ + av_log(h, AV_LOG_DEBUG, "Soft-seeking to offset %"PRIu64" by draining " + "%"PRIu64" remaining byte(s)\n", s->off, remaining); + while (remaining) { + int ret = ffurl_read(s->hd, discard, FFMIN(remaining, sizeof(discard))); + if (ret < 0 || ret == AVERROR_EOF || (ret == 0 && remaining)) { + /* connection broken or stuck, need to reopen */ + ffurl_closep(&s->hd); + break; + } + remaining -= ret; + } + } else { + /* can't soft seek; always open new connection */ + old_hd = s->hd; + s->hd = NULL; + } /* if it fails, continue on old connection */ if ((ret = http_open_cnx(h, &options)) < 0) { -- 2.52.0 >From 8b0d9a211800a382ca14e737491656dbda63fc45 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 23 Jan 2026 11:19:17 +0100 Subject: [PATCH 7/7] avformat/http: allow limiting request size Sometimes, HTTP sources require a lot of seeking (especially for formats like MXF). Currently, we need to completely tear down and re-establish the connection most times this happens, which puts a lot of stress on the network stack and also results in transmission of possibly many unnecessary bytes. This patch adds an option to allow FFmpeg to request partial ranges, starting with a configurable window (e.g. 16 kB) and growing exponentially as we perform subsequent requests without seeking. Enabled by default if -multiple_requests 1 is set, with the default request size being 32 kB (matching the AVIO default buffer size). --- libavformat/http.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/libavformat/http.c b/libavformat/http.c index 83f87f8b85..229c25e0f7 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -101,6 +101,10 @@ typedef struct HTTPContext { int end_header; /* A flag which indicates if we use persistent connections. */ int multiple_requests; + /* Minimum and maximum request size (for partial requests). */ + uint64_t request_size_min; + uint64_t request_size_max; + uint64_t request_size; ///< current request size, resets on seek uint8_t *post_data; int post_datalen; int is_akamai; @@ -162,6 +166,8 @@ static const AVOption options[] = { { "user_agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, { .str = DEFAULT_USER_AGENT }, 0, 0, D }, { "referer", "override referer header", OFFSET(referer), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D }, { "multiple_requests", "use persistent connections", OFFSET(multiple_requests), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, D | E }, + { "request_size_min", "minimum size (in bytes) of each request", OFFSET(request_size_min), AV_OPT_TYPE_INT64, { .i64 = 1 << 15 }, 1024, INT64_MAX, D }, + { "request_size_max", "maximum size (in bytes) of each request", OFFSET(request_size_max), AV_OPT_TYPE_INT64, { .i64 = INT64_MAX }, 1024, INT64_MAX, D }, { "post_data", "set custom HTTP post data", OFFSET(post_data), AV_OPT_TYPE_BINARY, .flags = D | E }, { "mime_type", "export the MIME type", OFFSET(mime_type), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY }, { "http_version", "export the http response version", OFFSET(http_version), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY }, @@ -754,6 +760,17 @@ static int http_open(URLContext *h, const char *uri, int flags, else h->is_streamed = 1; + if (s->multiple_requests && s->seekable != 0) { + if (s->request_size_max < s->request_size_min) { + av_log(h, AV_LOG_ERROR, + "Invalid request size range: min %"PRIu64" > max %"PRIu64"\n", + s->request_size_min, s->request_size_max); + return AVERROR(EINVAL); + } + + s->request_size = s->request_size_min; + } + s->filesize = UINT64_MAX; s->location = av_strdup(uri); @@ -1455,6 +1472,9 @@ static int http_read_header(URLContext *h) if (s->seekable == -1 && s->is_mediagateway && s->filesize == 2000000000) h->is_streamed = 1; /* we can in fact _not_ seek */ + if (s->willclose || h->is_streamed) + s->request_size = 0; /* no point in using multiple requests */ + // add any new cookies into the existing cookie string cookie_string(s->cookie_dict, &s->cookies); av_dict_free(&s->cookie_dict); @@ -1562,9 +1582,19 @@ static int http_connect(URLContext *h, const char *path, const char *local_path, // Note: we send the Range header on purpose, even when we're probing, // since it allows us to detect more reliably if a (non-conforming) // server supports seeking by analysing the reply headers. - if (!has_header(s->headers, "\r\nRange: ") && !post && (s->off > 0 || s->end_off || s->seekable != 0)) { + if (!has_header(s->headers, "\r\nRange: ") && !post && + (s->off > 0 || s->end_off || s->request_size || s->seekable != 0)) + { av_bprintf(&request, "Range: bytes=%"PRIu64"-", s->off); - if (s->end_off) + if (s->request_size) { + uint64_t target_off = s->off + s->request_size; + if (target_off < s->off) /* overflow */ + target_off = UINT64_MAX; + if (s->end_off) + target_off = FFMIN(target_off, s->end_off); + if (target_off != UINT64_MAX) + av_bprintf(&request, "%"PRId64, target_off - 1); + } else if (s->end_off) av_bprintf(&request, "%"PRId64, s->end_off - 1); av_bprintf(&request, "\r\n"); } @@ -1647,6 +1677,14 @@ static int http_connect(URLContext *h, const char *path, const char *local_path, s->off = off; err = (off == s->off) ? 0 : -1; + if (!err && s->request_size) { + /* double request size after successful request */ + uint64_t next_size = s->request_size << 1; + if (next_size < s->request_size) /* overflow */ + next_size = UINT64_MAX; + s->request_size = FFMIN(next_size, s->request_size_max); + } + done: av_freep(&authstr); av_freep(&proxyauthstr); @@ -2047,6 +2085,9 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo if (s->off && h->is_streamed) return AVERROR(ENOSYS); + if (s->request_size) /* reset request size on seek */ + s->request_size = s->request_size_min; + /* do not try to make a new connection if seeking past the end of the file */ if (s->end_off || s->filesize != UINT64_MAX) { uint64_t end_pos = s->end_off ? s->end_off : s->filesize; -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
