PR #23173 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23173
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23173.patch

Normally, over-reading garbage data from the padding region is fine, even when 
horizontally filtering; because they are explicitly multiplied by 0 in the 
latter case. However, 0 * NaN = 0 * Infinity = NaN; and x + NaN = NaN, so this 
corrupts the entire filtered pixel.

I tried unsuccessfully to solve this is a number of different ways (e.g. 
adjusting the filter offsets and weights backwards to avoid reading into the 
padding bytes at all), but those attempts all came with other edge cases (e.g. 
if the image itself is smaller than the number of aligned filter taps). I think 
the only truly robust solution here is to just force the copy into the tail 
buffer.

As an aside, the tail buffer is always zero-initialized, and will never contain 
garbage data inside the padding bytes, because we don't copy any extra pixels.


>From 200592d1883560bd27d52717be39f9e80a3e791e Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Wed, 20 May 2026 18:13:32 +0200
Subject: [PATCH 1/2] swscale/ops_dispatch: directly communicate extra filter
 taps

over_read is arguably a clumsy way of solving this; fundamentally this field
is about communicating that the filter requires access to additional taps
beyond what is available in the filter itself.

This shouldn't actually change anything, as the accounting is just moved
from `over_read` to `filter_read_h`, but makes the code a bit simpler
and more transparent.

Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_chain.h    | 1 +
 libswscale/ops_dispatch.c | 9 +++++----
 libswscale/ops_dispatch.h | 1 +
 libswscale/x86/ops.c      | 5 ++---
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h
index dcd9a9fc74..08e29e8fb1 100644
--- a/libswscale/ops_chain.h
+++ b/libswscale/ops_chain.h
@@ -114,6 +114,7 @@ typedef struct SwsImplResult {
     void (*free)(SwsOpPriv *priv); /* free function for `priv` */
     int over_read;  /* implementation over-reads input by this many bytes */
     int over_write; /* implementation over-writes output by this many bytes */
+    int extra_taps; /* extra filter taps needed for read */
 } SwsImplResult;
 
 typedef struct SwsOpEntry {
diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c
index f477c7839a..ab56bacaf1 100644
--- a/libswscale/ops_dispatch.c
+++ b/libswscale/ops_dispatch.c
@@ -45,7 +45,7 @@ typedef struct SwsOpPass {
     int idx_in[4];
     int idx_out[4];
     int *offsets_y;
-    int filter_size;
+    int filter_taps_h;
     bool memcpy_first;
     bool memcpy_last;
     bool memcpy_out;
@@ -207,7 +207,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
         size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
         size_t safe_blocks_in;
         if (exec->in_offset_x) {
-            size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in,
+            size_t filter_size = pixel_bytes(p->filter_taps_h, 
p->pixel_bits_in,
                                              AV_ROUND_UP);
             safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
                                                 safe_bytes - filter_size,
@@ -271,7 +271,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
     if (exec->in_offset_x) {
         p->tail_off_in  = exec->in_offset_x[safe_width];
         p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
-        p->tail_size_in += pixel_bytes(p->filter_size, p->pixel_bits_in, 
AV_ROUND_UP);
+        p->tail_size_in += pixel_bytes(p->filter_taps_h, p->pixel_bits_in, 
AV_ROUND_UP);
     } else {
         p->tail_off_in  = pixel_bytes(safe_width, p->pixel_bits_in, 
AV_ROUND_DOWN);
         p->tail_size_in = pixel_bytes(tail_size,  p->pixel_bits_in, 
AV_ROUND_UP);
@@ -524,6 +524,7 @@ static int compile(SwsGraph *graph, const SwsOpBackend 
*backend,
     const SwsFilterWeights *filter = read->rw.kernel;
     if (read->rw.filter == SWS_OP_FILTER_V) {
         p->offsets_y = av_refstruct_ref(filter->offsets);
+        av_assert0(!comp->extra_taps);
 
         /* Compute relative pointer bumps for each output line */
         int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
@@ -564,7 +565,7 @@ static int compile(SwsGraph *graph, const SwsOpBackend 
*backend,
         for (int x = filter->dst_size; x < pixels; x++)
             offset[x] = offset[filter->dst_size - 1];
         p->exec_base.block_size_in = 0; /* ptr does not advance */
-        p->filter_size = filter->filter_size;
+        p->filter_taps_h = filter->filter_size + comp->extra_taps;
     }
 
     ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h
index be771da9a9..0ceb92b0c6 100644
--- a/libswscale/ops_dispatch.h
+++ b/libswscale/ops_dispatch.h
@@ -119,6 +119,7 @@ typedef struct SwsCompiledOp {
     int block_size;  /* number of pixels processed per iteration */
     int over_read;   /* implementation over-reads input by this many bytes */
     int over_write;  /* implementation over-writes output by this many bytes */
+    int extra_taps;  /* extra filter taps needed for read */
 
     /* Arbitrary private data */
     void *priv;
diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index 20369652cc..e5dd17cbd0 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -423,7 +423,7 @@ static int setup_filter_h(const SwsImplParams *params, 
SwsImplResult *out)
     out->priv.ptr = weights.ptr;
     out->priv.uptr[1] = aligned_size;
     out->free = ff_op_priv_free;
-    out->over_read = (aligned_size - filter_size) * pixel_size;
+    out->extra_taps = aligned_size - filter_size;
     return 0;
 }
 
@@ -455,7 +455,6 @@ static int setup_filter_4x4_h(const SwsImplParams *params, 
SwsImplResult *out)
 {
     const SwsOp *op = params->op;
     const SwsFilterWeights *filter = op->rw.kernel;
-    const int pixel_size = ff_sws_pixel_type_size(op->type);
     const int sizeof_weights = hscale_sizeof_weight(op);
     const int block_size = params->table->block_size;
     const int taps_align = 16 / sizeof_weights; /* taps per iteration (XMM) */
@@ -506,7 +505,7 @@ static int setup_filter_4x4_h(const SwsImplParams *params, 
SwsImplResult *out)
     out->priv.ptr = weights.ptr;
     out->priv.uptr[1] = aligned_size * sizeof_weights;
     out->free = ff_op_priv_free;
-    out->over_read = (aligned_size - filter_size) * pixel_size;
+    out->extra_taps = aligned_size - filter_size;
     return 0;
 }
 
-- 
2.52.0


>From e0a1501a34dabd87a941d387238f8151c4ba3f39 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Wed, 20 May 2026 17:53:56 +0200
Subject: [PATCH 2/2] swscale/ops_dispatch: prevent float over-read when
 horizontal filtering

The code made the fundamental assumption that over-read into the padding
bytes is okay to do; because the most that can happen is that those pixel
values end up corrupted, which doesn't affect any adjacent pixels.

However, this is not true for SWS_OP_FILTER_H, because this operation
fundamentally mixes together horizontal pixels. Normally, this was fine,
because the filter weights for those pixels are set to 0, and 0 * x = 0.

However, that is not true for floating point inputs, which can contain
Infinity; and 0 * Infinity = NaN, thus corrupting the entire pixel.

Solve it by specifically preventing over-read when it would be unsafe.

Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_dispatch.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c
index ab56bacaf1..1f2d512469 100644
--- a/libswscale/ops_dispatch.c
+++ b/libswscale/ops_dispatch.c
@@ -182,6 +182,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
 {
     const AVPixFmtDescriptor *indesc  = av_pix_fmt_desc_get(in->format);
     const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
+    const bool float_in = indesc->flags & AV_PIX_FMT_FLAG_FLOAT;
 
     SwsOpPass *p = pass->priv;
     SwsOpExec *exec = &p->exec_base;
@@ -204,8 +205,17 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
         int chroma = idx == 1 || idx == 2;
         int sub_x  = chroma ? indesc->log2_chroma_w : 0;
         int sub_y  = chroma ? indesc->log2_chroma_h : 0;
-        size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
+        size_t safe_bytes;
         size_t safe_blocks_in;
+
+        if (p->filter_taps_h && float_in) {
+            /* Floating point inputs may contain NaN in the padding */
+            safe_bytes = pixel_bytes(pass->width, p->pixel_bits_in, 
AV_ROUND_DOWN);
+        } else {
+            /* In all other cases, we can safely over-read into the stride */
+            safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
+        }
+
         if (exec->in_offset_x) {
             size_t filter_size = pixel_bytes(p->filter_taps_h, 
p->pixel_bits_in,
                                              AV_ROUND_UP);
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to