PR #23476 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23476
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23476.patch


>From a9953c676276d2e0efec7095d792d87a3def0364 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Sun, 14 Jun 2026 11:21:19 +0200
Subject: [PATCH 1/7] avformat/shared: clean up PENDING state on error/early
 exit

This makes sure threads that fail for reasons other than the underlying I/O
failing clean up after their own PENDING state on failure, unless another
thread updated the block state in the meantime.

Subsumes the existing "is_race" condition, which is inverted to "acquired"
that is 1 exactly when we were the thread that set the PENDING state.

Sponsored-by: nxtedition AB
Signed-off-by: Niklas Haas <[email protected]>
---
 libavformat/shared.c | 54 +++++++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/libavformat/shared.c b/libavformat/shared.c
index a7dccb33df..47bc072f4f 100644
--- a/libavformat/shared.c
+++ b/libavformat/shared.c
@@ -600,7 +600,7 @@ static int shared_read(URLContext *h, unsigned char *buf, 
int size)
     Block *const block = &s->spacemap->blocks[block_id];
     unsigned short state = atomic_load_explicit(&block->state, 
memory_order_acquire);
     int64_t pending_since = 0;
-    int verify_read = 0, is_race = 0;
+    int verify_read = 0, acquired = 0;
 
 retry:
     switch (state) {
@@ -652,6 +652,7 @@ retry:
                                                   memory_order_acquire))
         {
             /* Acquired pending state, proceed to fetch the block */
+            acquired = 1;
             state = BLOCK_PENDING;
             break;
         }
@@ -661,14 +662,11 @@ retry:
     case BLOCK_PENDING:
         /* Another thread is busy fetching this block, wait for it to finish */
         if (!s->timeout) {
-            is_race = 1;
             break; /* no timeout requested, immediately race to fetch block */
         } else if (pending_since) {
             int64_t new = av_gettime_relative();
-            if (new - pending_since >= s->timeout) {
-                is_race = 1;
+            if (new - pending_since >= s->timeout)
                 break; /* timeout expired, try to fetch the block ourselves */
-            }
         } else {
             pending_since = av_gettime_relative();
         }
@@ -689,16 +687,8 @@ retry:
         if (inner_pos < 0) {
             av_log(h, AV_LOG_ERROR, "Failed to seek underlying protocol: %s\n",
                    av_err2str(inner_pos));
-            if (!read_only) {
-                /* Release pending state to avoid stalling other threads. Don't
-                 * mark this as failed, since the seek error may be unrelated 
to
-                 * the block and should probably be tried again. */
-                atomic_compare_exchange_strong_explicit(&block->state, &state,
-                                                        BLOCK_NONE,
-                                                        memory_order_relaxed,
-                                                        memory_order_relaxed);
-            }
-            return inner_pos;
+            ret = inner_pos;
+            goto soft_fail;
         }
 
         av_log(h, AV_LOG_DEBUG, "Inner seek to 0x%"PRIx64"\n", inner_pos);
@@ -708,8 +698,10 @@ retry:
     if (read_only) {
         /* Directly defer to the underlying protocol */
         ret = ffurl_read(s->inner, buf, size);
-        if (ret < 0)
+        if (ret < 0) {
+            av_assert1(!acquired);
             return ret;
+        }
 
         /* Verify the read data against the cached data if requested */
         if (verify_read && memcmp(buf, tmp, ret)) {
@@ -723,7 +715,7 @@ retry:
     }
 
     int write_back = 1;
-    if (s->cache_data && !is_race) {
+    if (s->cache_data && acquired) {
         /* Read directly into memory mapped cache file */
         tmp = s->cache_data + block_pos;
         write_back = 0;
@@ -745,15 +737,14 @@ retry:
         else if (ret < 0) {
             av_log(h, AV_LOG_ERROR, "Failed to read block 0x%"PRIx64": %s\n",
                    block_id, av_err2str(ret));
-            int new_state = BLOCK_FAILED;
             if (ret == AVERROR(EAGAIN) || ret == AVERROR_EXIT)
-                new_state = BLOCK_NONE; /* transient error, allow retries */
+                goto soft_fail;  /* transient error, allow retries */
 
             /* Try to mark block as failed; ignore errors - any mismatch
              * here will mean that either another thread already marked it
              * as failed, or successfully cached it in the meantime */
             atomic_compare_exchange_strong_explicit(&block->state, &state,
-                                                    new_state,
+                                                    BLOCK_FAILED,
                                                     memory_order_relaxed,
                                                     memory_order_relaxed);
             return ret;
@@ -767,7 +758,7 @@ retry:
         /* Learned location of true EOF, update filesize */
         ret = set_filesize(h, inner_pos + bytes_read);
         if (ret < 0)
-            return ret;
+            goto soft_fail;
     }
 
     if (bytes_read > 0) {
@@ -776,12 +767,7 @@ retry:
             av_log(h, AV_LOG_ERROR, "Failed to write to cache file: %s\n",
                    av_err2str(ret));
             s->write_err = 1;
-            /* Mark as NONE, not FAILED, since the block itself is fine -
-             * just absent from the cache. */
-            atomic_compare_exchange_strong_explicit(&block->state, &state,
-                                                    BLOCK_NONE,
-                                                    memory_order_relaxed,
-                                                    memory_order_relaxed);
+            goto soft_fail;
         } else {
             uint16_t crc = get_block_crc(tmp, bytes_read);
             av_log(h, AV_LOG_TRACE, "Cached %d bytes to block 0x%"PRIx64" at "
@@ -790,7 +776,8 @@ retry:
             atomic_store_explicit(&block->state, crc, memory_order_release);
         }
     } else {
-        return AVERROR_EOF;
+        ret = AVERROR_EOF;
+        goto soft_fail;
     }
 
     size = FFMIN(bytes_read - offset, size);
@@ -800,6 +787,17 @@ retry:
         memcpy(buf, &tmp[offset], size);
     s->pos += size;
     return size;
+
+soft_fail:
+    if (acquired) {
+        /* Release pending state on failure to avoid stalling other threads */
+        av_assert1(state == BLOCK_PENDING);
+        atomic_compare_exchange_strong_explicit(&block->state, &state,
+                                                BLOCK_NONE,
+                                                memory_order_relaxed,
+                                                memory_order_relaxed);
+    }
+    return ret;
 }
 
 static int64_t shared_seek(URLContext *h, int64_t pos, int whence)
-- 
2.52.0


>From b06eb2dc1d9d9023257cc91e3a54f4e6bd2d8319 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Sun, 14 Jun 2026 11:25:45 +0200
Subject: [PATCH 2/7] avformat/shared: correctly early-exit on read past last
 block end

Sponsored-by: nxtedition AB
Signed-off-by: Niklas Haas <[email protected]>
---
 libavformat/shared.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavformat/shared.c b/libavformat/shared.c
index 47bc072f4f..a7e64063e5 100644
--- a/libavformat/shared.c
+++ b/libavformat/shared.c
@@ -629,6 +629,8 @@ retry:
 
         tmp += (ptrdiff_t) offset;
         size = FFMIN(size, block_size - offset);
+        if (size <= 0)
+            return AVERROR_EOF;
         if (s->verify) {
             verify_read = 1;
             break; /* fall through to the cache miss logic */
-- 
2.52.0


>From 9561532cfb5971ee90f21dc760bf794ffe7bbdeb Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Sun, 14 Jun 2026 11:35:13 +0200
Subject: [PATCH 3/7] avformat/shared: don't acquire more blocks on already
 errored caches

Correctly mirrors the logic used to decide whether to write back to the
cache or not.

Sponsored-by: nxtedition AB
Signed-off-by: Niklas Haas <[email protected]>
---
 libavformat/shared.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/shared.c b/libavformat/shared.c
index a7e64063e5..a3bd08be3e 100644
--- a/libavformat/shared.c
+++ b/libavformat/shared.c
@@ -646,7 +646,7 @@ retry:
             return AVERROR(EIO);
         av_fallthrough;
     case BLOCK_NONE:
-        if (s->read_only)
+        if (s->read_only || s->write_err)
             break; /* don't mark block as pending */
         if (atomic_compare_exchange_weak_explicit(&block->state, &state,
                                                   BLOCK_PENDING,
-- 
2.52.0


>From 3e05c3d4ddcb687f765c757e74d35ed9de5d0789 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Sun, 14 Jun 2026 11:43:01 +0200
Subject: [PATCH 4/7] avformat/shared: early exit on prior spacemap I/O failure

The code implicitly assumes that the caller won't re-call read()/seek()
after observing a prior AVERROR(EIO). However, this is not necessarily a
future-proof guarantee, so it's safer to explicitly re-error in this case.

Sponsored-by: nxtedition AB
Signed-off-by: Niklas Haas <[email protected]>
---
 libavformat/shared.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavformat/shared.c b/libavformat/shared.c
index a3bd08be3e..c2a1d44fb6 100644
--- a/libavformat/shared.c
+++ b/libavformat/shared.c
@@ -580,6 +580,8 @@ static int shared_read(URLContext *h, unsigned char *buf, 
int size)
     SharedContext *s = h->priv_data;
     uint8_t *tmp;
     int ret;
+    if (!s->spacemap)
+        return AVERROR(EIO);
 
     if (size <= 0)
         return 0;
@@ -807,6 +809,8 @@ static int64_t shared_seek(URLContext *h, int64_t pos, int 
whence)
     SharedContext *s = h->priv_data;
     const int64_t filesize = get_filesize(h);
     int64_t res;
+    if (!s->spacemap)
+        return AVERROR(EIO);
 
     switch (whence) {
     case AVSEEK_SIZE:
-- 
2.52.0


>From 605db6a93b3b72d7b1f2a726adc0542c60c40e17 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Sun, 14 Jun 2026 11:44:33 +0200
Subject: [PATCH 5/7] avformat/shared: hard-error on -cache_verify data
 mismatch

Makes this debug option way more useful. Also matches the existing behavior
on CRC mismatches.

Sponsored-by: nxtedition AB
Signed-off-by: Niklas Haas <[email protected]>
---
 libavformat/shared.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/shared.c b/libavformat/shared.c
index c2a1d44fb6..6c3d25df32 100644
--- a/libavformat/shared.c
+++ b/libavformat/shared.c
@@ -712,6 +712,7 @@ retry:
             av_log(h, AV_LOG_ERROR, "Cache verification failed for %d bytes "
                    "in block 0x%"PRIx64" at offset 0x%"PRIx64" + %"PRId64"!\n",
                    ret, block_id, block_pos, offset);
+            return AVERROR(EIO);
         }
 
         s->pos = s->inner_pos = inner_pos + ret;
-- 
2.52.0


>From 67f84a1cb488b9f8b2db992773a1e104edbc172b Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Sun, 14 Jun 2026 11:51:22 +0200
Subject: [PATCH 6/7] avformat/shared: allow interruption during BLOCK_PENDING
 read loop

Also allows nonblocking calls.

Sponsored-by: nxtedition AB
Signed-off-by: Niklas Haas <[email protected]>
---
 libavformat/shared.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavformat/shared.c b/libavformat/shared.c
index 6c3d25df32..49b9112d8d 100644
--- a/libavformat/shared.c
+++ b/libavformat/shared.c
@@ -675,8 +675,14 @@ retry:
             pending_since = av_gettime_relative();
         }
 
+        if (h->flags & AVIO_FLAG_NONBLOCK)
+            return AVERROR(EAGAIN);
+
         /* Make sure we try a few times before giving up */
         av_usleep(s->timeout >> 4);
+        if (ff_check_interrupt(&h->interrupt_callback))
+            return AVERROR_EXIT;
+
         state = atomic_load_explicit(&block->state, memory_order_acquire);
         goto retry;
     }
-- 
2.52.0


>From c868fd375e16379e2c3e31e85e4ecd092a4a1a85 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Sun, 14 Jun 2026 12:03:23 +0200
Subject: [PATCH 7/7] avformat/shared: don't consider EINTR a hard failure

Don't trigger `write_err` for this transient failure.

Sponsored-by: nxtedition AB
Signed-off-by: Niklas Haas <[email protected]>
---
 libavformat/shared.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/libavformat/shared.c b/libavformat/shared.c
index 49b9112d8d..417622c7d4 100644
--- a/libavformat/shared.c
+++ b/libavformat/shared.c
@@ -775,9 +775,11 @@ retry:
     if (bytes_read > 0) {
         ret = write_back ? write_cache(s, tmp, bytes_read, block_pos) : 0;
         if (ret < 0) {
-            av_log(h, AV_LOG_ERROR, "Failed to write to cache file: %s\n",
-                   av_err2str(ret));
-            s->write_err = 1;
+            if (ret != AVERROR(EINTR)) {
+                av_log(h, AV_LOG_ERROR, "Failed to write to cache file: %s\n",
+                    av_err2str(ret));
+                s->write_err = 1;
+            }
             goto soft_fail;
         } else {
             uint16_t crc = get_block_crc(tmp, bytes_read);
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to