PR #21264 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21264
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21264.patch

Depends on #21263. Supersedes #21208.

This is an IMHO cleaner reimplementation of the same underlying principle of 
letting the format code attach metadata to the op list. Instead of a 
`SWS_OP_ASSUME` pseudo-op, this version instead attaches it directly to the 
`SwsComps` field of `SWS_OP_READ`, and changes the semantics of 
`ff_sws_op_list_update_comps()` to respect that information rather than nuking 
it every time.

Crucially, this version ensures that all available information survives and 
persists all the way to the backend; allowing those to make smarter decisions 
about what to do. (The current backends do not care, but @ramiro wants it for 
his asmjit)

Also fixes a number of bugs en passent.


>From d93ed0e7ada69c32541a0688d50527cb62a68637 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 15:31:01 +0100
Subject: [PATCH 01/10] swscale/ops_internal: fix ff_sws_pack_op_decode()

This function was assuming that the bits are MSB-aligned, but they are
LSB-aligned in both practice (and in the actual backend).

Also update the documentation of SwsPackOp to make this clearer.

Fixes an incorrect omission of a clamp after decoding e.g. rgb4, since
the max value range was incorrectly determined as 0 as a result of unpacking
the MSB bits instead of the LSB bits:

 bgr4 -> gray:
   [ u8 XXXX -> +XXX] SWS_OP_READ         : 1 elem(s) packed >> 1
   [ u8 .XXX -> +++X] SWS_OP_UNPACK       : {1 2 1 0}
   [ u8 ...X -> +++X] SWS_OP_SWIZZLE      : 2103
   [ u8 ...X -> +++X] SWS_OP_CONVERT      : u8 -> f32
   [f32 ...X -> .++X] SWS_OP_LINEAR       : dot3 [...]
   [f32 .XXX -> .++X] SWS_OP_DITHER       : 16x16 matrix + {0 3 2 5}
+  [f32 .XXX -> .++X] SWS_OP_MIN          : x <= {255 _ _ _}
   [f32 .XXX -> +++X] SWS_OP_CONVERT      : f32 -> u8
   [ u8 .XXX -> +++X] SWS_OP_WRITE        : 1 elem(s) planar >> 0
     (X = unused, + = exact, 0 = zero)
---
 libswscale/ops.h            | 4 ++++
 libswscale/ops_internal.h   | 4 +++-
 tests/ref/fate/sws-ops-list | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libswscale/ops.h b/libswscale/ops.h
index 6392d0ffdf..6fc7e60a02 100644
--- a/libswscale/ops.h
+++ b/libswscale/ops.h
@@ -109,6 +109,10 @@ typedef struct SwsReadWriteOp {
 } SwsReadWriteOp;
 
 typedef struct SwsPackOp {
+    /**
+     * Packed bits are assumed to be LSB-aligned within the underlying
+     * integer type; i.e. (msb) 0 ... X Y Z W (lsb).
+     */
     uint8_t pattern[4]; /* bit depth pattern, from MSB to LSB */
 } SwsPackOp;
 
diff --git a/libswscale/ops_internal.h b/libswscale/ops_internal.h
index 0071f78558..ba4c9b39da 100644
--- a/libswscale/ops_internal.h
+++ b/libswscale/ops_internal.h
@@ -39,7 +39,9 @@ static inline AVRational ff_sws_pixel_expand(SwsPixelType 
from, SwsPixelType to)
 
 static inline void ff_sws_pack_op_decode(const SwsOp *op, uint64_t mask[4], 
int shift[4])
 {
-    const int size = ff_sws_pixel_type_size(op->type) * 8;
+    int size = 0;
+    for (int i = 0; i < 4; i++)
+        size += op->pack.pattern[i];
     for (int i = 0; i < 4; i++) {
         const int bits = op->pack.pattern[i];
         mask[i] = (UINT64_C(1) << bits) - 1;
diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list
index b49f944794..a7d6149d8b 100644
--- a/tests/ref/fate/sws-ops-list
+++ b/tests/ref/fate/sws-ops-list
@@ -1 +1 @@
-e910ff7ceaeb64bfdbac3f652b67403f
+ef1dd10af970984495f6008e43d0fe1b
-- 
2.49.1


>From dc2f27b3f68c12e8ce3dd8315d98ad4ecdf307d0 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 14:07:45 +0100
Subject: [PATCH 02/10] swscale/ops: move ff_sws_op_list_update_comps() to
 ops.c

I think this is ultimately a better home, since the semantics of this are
not really tied to optimization itself; and because I want to make it an
explicitly suported part of the user-facing API (rather than just an
internal-use field).

The secondary motivating reason here is that I intend to use internal
helpers of `ops.c` inside the next commit. (Though this is a weak reason
on its own, and not sufficient to justify this move by itself.)
---
 libswscale/ops.c           | 239 +++++++++++++++++++++++++++++++++++++
 libswscale/ops_optimizer.c | 239 -------------------------------------
 2 files changed, 239 insertions(+), 239 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index f84416feed..5b08b743bf 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -211,6 +211,245 @@ void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
     av_unreachable("Invalid operation type!");
 }
 
+/* merge_comp_flags() forms a monoid with flags_identity as the null element */
+static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT;
+static unsigned merge_comp_flags(unsigned a, unsigned b)
+{
+    const unsigned flags_or  = SWS_COMP_GARBAGE;
+    const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT;
+    return ((a & b) & flags_and) | ((a | b) & flags_or);
+}
+
+/* Infer + propagate known information about components */
+void ff_sws_op_list_update_comps(SwsOpList *ops)
+{
+    SwsComps next = { .unused = {true, true, true, true} };
+    SwsComps prev = { .flags = {
+        SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE,
+    }};
+
+    /* Forwards pass, propagates knowledge about the incoming pixel values */
+    for (int n = 0; n < ops->num_ops; n++) {
+        SwsOp *op = &ops->ops[n];
+
+        /* Prefill min/max values automatically; may have to be fixed in
+         * special cases */
+        memcpy(op->comps.min, prev.min, sizeof(prev.min));
+        memcpy(op->comps.max, prev.max, sizeof(prev.max));
+
+        if (op->op != SWS_OP_SWAP_BYTES) {
+            ff_sws_apply_op_q(op, op->comps.min);
+            ff_sws_apply_op_q(op, op->comps.max);
+        }
+
+        switch (op->op) {
+        case SWS_OP_READ:
+            for (int i = 0; i < op->rw.elems; i++) {
+                if (ff_sws_pixel_type_is_int(op->type)) {
+                    int bits = 8 * ff_sws_pixel_type_size(op->type) >> 
op->rw.frac;
+                    if (!op->rw.packed && ops->src.desc) {
+                        /* Use legal value range from pixdesc if available;
+                         * we don't need to do this for packed formats because
+                         * non-byte-aligned packed formats will necessarily go
+                         * through SWS_OP_UNPACK anyway */
+                        for (int c = 0; c < 4; c++) {
+                            if (ops->src.desc->comp[c].plane == i) {
+                                bits = ops->src.desc->comp[c].depth;
+                                break;
+                            }
+                        }
+                    }
+
+                    op->comps.flags[i] = SWS_COMP_EXACT;
+                    op->comps.min[i] = Q(0);
+                    op->comps.max[i] = Q((1ULL << bits) - 1);
+                }
+            }
+            for (int i = op->rw.elems; i < 4; i++)
+                op->comps.flags[i] = prev.flags[i];
+            break;
+        case SWS_OP_WRITE:
+            for (int i = 0; i < op->rw.elems; i++)
+                av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE));
+            /* fall through */
+        case SWS_OP_SWAP_BYTES:
+        case SWS_OP_LSHIFT:
+        case SWS_OP_RSHIFT:
+        case SWS_OP_MIN:
+        case SWS_OP_MAX:
+            /* Linearly propagate flags per component */
+            for (int i = 0; i < 4; i++)
+                op->comps.flags[i] = prev.flags[i];
+            break;
+        case SWS_OP_DITHER:
+            /* Strip zero flag because of the nonzero dithering offset */
+            for (int i = 0; i < 4; i++)
+                op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO;
+            break;
+        case SWS_OP_UNPACK:
+            for (int i = 0; i < 4; i++) {
+                if (op->pack.pattern[i])
+                    op->comps.flags[i] = prev.flags[0];
+                else
+                    op->comps.flags[i] = SWS_COMP_GARBAGE;
+            }
+            break;
+        case SWS_OP_PACK: {
+            unsigned flags = flags_identity;
+            for (int i = 0; i < 4; i++) {
+                if (op->pack.pattern[i])
+                    flags = merge_comp_flags(flags, prev.flags[i]);
+                if (i > 0) /* clear remaining comps for sanity */
+                    op->comps.flags[i] = SWS_COMP_GARBAGE;
+            }
+            op->comps.flags[0] = flags;
+            break;
+        }
+        case SWS_OP_CLEAR:
+            for (int i = 0; i < 4; i++) {
+                if (op->c.q4[i].den) {
+                    if (op->c.q4[i].num == 0) {
+                        op->comps.flags[i] = SWS_COMP_ZERO | SWS_COMP_EXACT;
+                    } else if (op->c.q4[i].den == 1) {
+                        op->comps.flags[i] = SWS_COMP_EXACT;
+                    }
+                } else {
+                    op->comps.flags[i] = prev.flags[i];
+                }
+            }
+            break;
+        case SWS_OP_SWIZZLE:
+            for (int i = 0; i < 4; i++)
+                op->comps.flags[i] = prev.flags[op->swizzle.in[i]];
+            break;
+        case SWS_OP_CONVERT:
+            for (int i = 0; i < 4; i++) {
+                op->comps.flags[i] = prev.flags[i];
+                if (ff_sws_pixel_type_is_int(op->convert.to))
+                    op->comps.flags[i] |= SWS_COMP_EXACT;
+            }
+            break;
+        case SWS_OP_LINEAR:
+            for (int i = 0; i < 4; i++) {
+                unsigned flags = flags_identity;
+                AVRational min = Q(0), max = Q(0);
+                for (int j = 0; j < 4; j++) {
+                    const AVRational k = op->lin.m[i][j];
+                    AVRational mink = av_mul_q(prev.min[j], k);
+                    AVRational maxk = av_mul_q(prev.max[j], k);
+                    if (k.num) {
+                        flags = merge_comp_flags(flags, prev.flags[j]);
+                        if (k.den != 1) /* fractional coefficient */
+                            flags &= ~SWS_COMP_EXACT;
+                        if (k.num < 0)
+                            FFSWAP(AVRational, mink, maxk);
+                        min = av_add_q(min, mink);
+                        max = av_add_q(max, maxk);
+                    }
+                }
+                if (op->lin.m[i][4].num) { /* nonzero offset */
+                    flags &= ~SWS_COMP_ZERO;
+                    if (op->lin.m[i][4].den != 1) /* fractional offset */
+                        flags &= ~SWS_COMP_EXACT;
+                    min = av_add_q(min, op->lin.m[i][4]);
+                    max = av_add_q(max, op->lin.m[i][4]);
+                }
+                op->comps.flags[i] = flags;
+                op->comps.min[i] = min;
+                op->comps.max[i] = max;
+            }
+            break;
+        case SWS_OP_SCALE:
+            for (int i = 0; i < 4; i++) {
+                op->comps.flags[i] = prev.flags[i];
+                if (op->c.q.den != 1) /* fractional scale */
+                    op->comps.flags[i] &= ~SWS_COMP_EXACT;
+                if (op->c.q.num < 0)
+                    FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]);
+            }
+            break;
+
+        case SWS_OP_INVALID:
+        case SWS_OP_TYPE_NB:
+            av_unreachable("Invalid operation type!");
+        }
+
+        prev = op->comps;
+    }
+
+    /* Backwards pass, solves for component dependencies */
+    for (int n = ops->num_ops - 1; n >= 0; n--) {
+        SwsOp *op = &ops->ops[n];
+
+        switch (op->op) {
+        case SWS_OP_READ:
+        case SWS_OP_WRITE:
+            for (int i = 0; i < op->rw.elems; i++)
+                op->comps.unused[i] = op->op == SWS_OP_READ;
+            for (int i = op->rw.elems; i < 4; i++)
+                op->comps.unused[i] = next.unused[i];
+            break;
+        case SWS_OP_SWAP_BYTES:
+        case SWS_OP_LSHIFT:
+        case SWS_OP_RSHIFT:
+        case SWS_OP_CONVERT:
+        case SWS_OP_DITHER:
+        case SWS_OP_MIN:
+        case SWS_OP_MAX:
+        case SWS_OP_SCALE:
+            for (int i = 0; i < 4; i++)
+                op->comps.unused[i] = next.unused[i];
+            break;
+        case SWS_OP_UNPACK: {
+            bool unused = true;
+            for (int i = 0; i < 4; i++) {
+                if (op->pack.pattern[i])
+                    unused &= next.unused[i];
+                op->comps.unused[i] = i > 0;
+            }
+            op->comps.unused[0] = unused;
+            break;
+        }
+        case SWS_OP_PACK:
+            for (int i = 0; i < 4; i++) {
+                if (op->pack.pattern[i])
+                    op->comps.unused[i] = next.unused[0];
+                else
+                    op->comps.unused[i] = true;
+            }
+            break;
+        case SWS_OP_CLEAR:
+            for (int i = 0; i < 4; i++) {
+                if (op->c.q4[i].den)
+                    op->comps.unused[i] = true;
+                else
+                    op->comps.unused[i] = next.unused[i];
+            }
+            break;
+        case SWS_OP_SWIZZLE: {
+            bool unused[4] = { true, true, true, true };
+            for (int i = 0; i < 4; i++)
+                unused[op->swizzle.in[i]] &= next.unused[i];
+            for (int i = 0; i < 4; i++)
+                op->comps.unused[i] = unused[i];
+            break;
+        }
+        case SWS_OP_LINEAR:
+            for (int j = 0; j < 4; j++) {
+                bool unused = true;
+                for (int i = 0; i < 4; i++) {
+                    if (op->lin.m[i][j].num)
+                        unused &= next.unused[i];
+                }
+                op->comps.unused[j] = unused;
+            }
+            break;
+        }
+
+        next = op->comps;
+    }
+}
+
 static void op_uninit(SwsOp *op)
 {
     switch (op->op) {
diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c
index 9d668fee74..11ee40e268 100644
--- a/libswscale/ops_optimizer.c
+++ b/libswscale/ops_optimizer.c
@@ -146,245 +146,6 @@ static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
     return false;
 }
 
-/* merge_comp_flags() forms a monoid with flags_identity as the null element */
-static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT;
-static unsigned merge_comp_flags(unsigned a, unsigned b)
-{
-    const unsigned flags_or  = SWS_COMP_GARBAGE;
-    const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT;
-    return ((a & b) & flags_and) | ((a | b) & flags_or);
-}
-
-/* Infer + propagate known information about components */
-void ff_sws_op_list_update_comps(SwsOpList *ops)
-{
-    SwsComps next = { .unused = {true, true, true, true} };
-    SwsComps prev = { .flags = {
-        SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE,
-    }};
-
-    /* Forwards pass, propagates knowledge about the incoming pixel values */
-    for (int n = 0; n < ops->num_ops; n++) {
-        SwsOp *op = &ops->ops[n];
-
-        /* Prefill min/max values automatically; may have to be fixed in
-         * special cases */
-        memcpy(op->comps.min, prev.min, sizeof(prev.min));
-        memcpy(op->comps.max, prev.max, sizeof(prev.max));
-
-        if (op->op != SWS_OP_SWAP_BYTES) {
-            ff_sws_apply_op_q(op, op->comps.min);
-            ff_sws_apply_op_q(op, op->comps.max);
-        }
-
-        switch (op->op) {
-        case SWS_OP_READ:
-            for (int i = 0; i < op->rw.elems; i++) {
-                if (ff_sws_pixel_type_is_int(op->type)) {
-                    int bits = 8 * ff_sws_pixel_type_size(op->type) >> 
op->rw.frac;
-                    if (!op->rw.packed && ops->src.desc) {
-                        /* Use legal value range from pixdesc if available;
-                         * we don't need to do this for packed formats because
-                         * non-byte-aligned packed formats will necessarily go
-                         * through SWS_OP_UNPACK anyway */
-                        for (int c = 0; c < 4; c++) {
-                            if (ops->src.desc->comp[c].plane == i) {
-                                bits = ops->src.desc->comp[c].depth;
-                                break;
-                            }
-                        }
-                    }
-
-                    op->comps.flags[i] = SWS_COMP_EXACT;
-                    op->comps.min[i] = Q(0);
-                    op->comps.max[i] = Q((1ULL << bits) - 1);
-                }
-            }
-            for (int i = op->rw.elems; i < 4; i++)
-                op->comps.flags[i] = prev.flags[i];
-            break;
-        case SWS_OP_WRITE:
-            for (int i = 0; i < op->rw.elems; i++)
-                av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE));
-            /* fall through */
-        case SWS_OP_SWAP_BYTES:
-        case SWS_OP_LSHIFT:
-        case SWS_OP_RSHIFT:
-        case SWS_OP_MIN:
-        case SWS_OP_MAX:
-            /* Linearly propagate flags per component */
-            for (int i = 0; i < 4; i++)
-                op->comps.flags[i] = prev.flags[i];
-            break;
-        case SWS_OP_DITHER:
-            /* Strip zero flag because of the nonzero dithering offset */
-            for (int i = 0; i < 4; i++)
-                op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO;
-            break;
-        case SWS_OP_UNPACK:
-            for (int i = 0; i < 4; i++) {
-                if (op->pack.pattern[i])
-                    op->comps.flags[i] = prev.flags[0];
-                else
-                    op->comps.flags[i] = SWS_COMP_GARBAGE;
-            }
-            break;
-        case SWS_OP_PACK: {
-            unsigned flags = flags_identity;
-            for (int i = 0; i < 4; i++) {
-                if (op->pack.pattern[i])
-                    flags = merge_comp_flags(flags, prev.flags[i]);
-                if (i > 0) /* clear remaining comps for sanity */
-                    op->comps.flags[i] = SWS_COMP_GARBAGE;
-            }
-            op->comps.flags[0] = flags;
-            break;
-        }
-        case SWS_OP_CLEAR:
-            for (int i = 0; i < 4; i++) {
-                if (op->c.q4[i].den) {
-                    if (op->c.q4[i].num == 0) {
-                        op->comps.flags[i] = SWS_COMP_ZERO | SWS_COMP_EXACT;
-                    } else if (op->c.q4[i].den == 1) {
-                        op->comps.flags[i] = SWS_COMP_EXACT;
-                    }
-                } else {
-                    op->comps.flags[i] = prev.flags[i];
-                }
-            }
-            break;
-        case SWS_OP_SWIZZLE:
-            for (int i = 0; i < 4; i++)
-                op->comps.flags[i] = prev.flags[op->swizzle.in[i]];
-            break;
-        case SWS_OP_CONVERT:
-            for (int i = 0; i < 4; i++) {
-                op->comps.flags[i] = prev.flags[i];
-                if (ff_sws_pixel_type_is_int(op->convert.to))
-                    op->comps.flags[i] |= SWS_COMP_EXACT;
-            }
-            break;
-        case SWS_OP_LINEAR:
-            for (int i = 0; i < 4; i++) {
-                unsigned flags = flags_identity;
-                AVRational min = Q(0), max = Q(0);
-                for (int j = 0; j < 4; j++) {
-                    const AVRational k = op->lin.m[i][j];
-                    AVRational mink = av_mul_q(prev.min[j], k);
-                    AVRational maxk = av_mul_q(prev.max[j], k);
-                    if (k.num) {
-                        flags = merge_comp_flags(flags, prev.flags[j]);
-                        if (k.den != 1) /* fractional coefficient */
-                            flags &= ~SWS_COMP_EXACT;
-                        if (k.num < 0)
-                            FFSWAP(AVRational, mink, maxk);
-                        min = av_add_q(min, mink);
-                        max = av_add_q(max, maxk);
-                    }
-                }
-                if (op->lin.m[i][4].num) { /* nonzero offset */
-                    flags &= ~SWS_COMP_ZERO;
-                    if (op->lin.m[i][4].den != 1) /* fractional offset */
-                        flags &= ~SWS_COMP_EXACT;
-                    min = av_add_q(min, op->lin.m[i][4]);
-                    max = av_add_q(max, op->lin.m[i][4]);
-                }
-                op->comps.flags[i] = flags;
-                op->comps.min[i] = min;
-                op->comps.max[i] = max;
-            }
-            break;
-        case SWS_OP_SCALE:
-            for (int i = 0; i < 4; i++) {
-                op->comps.flags[i] = prev.flags[i];
-                if (op->c.q.den != 1) /* fractional scale */
-                    op->comps.flags[i] &= ~SWS_COMP_EXACT;
-                if (op->c.q.num < 0)
-                    FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]);
-            }
-            break;
-
-        case SWS_OP_INVALID:
-        case SWS_OP_TYPE_NB:
-            av_unreachable("Invalid operation type!");
-        }
-
-        prev = op->comps;
-    }
-
-    /* Backwards pass, solves for component dependencies */
-    for (int n = ops->num_ops - 1; n >= 0; n--) {
-        SwsOp *op = &ops->ops[n];
-
-        switch (op->op) {
-        case SWS_OP_READ:
-        case SWS_OP_WRITE:
-            for (int i = 0; i < op->rw.elems; i++)
-                op->comps.unused[i] = op->op == SWS_OP_READ;
-            for (int i = op->rw.elems; i < 4; i++)
-                op->comps.unused[i] = next.unused[i];
-            break;
-        case SWS_OP_SWAP_BYTES:
-        case SWS_OP_LSHIFT:
-        case SWS_OP_RSHIFT:
-        case SWS_OP_CONVERT:
-        case SWS_OP_DITHER:
-        case SWS_OP_MIN:
-        case SWS_OP_MAX:
-        case SWS_OP_SCALE:
-            for (int i = 0; i < 4; i++)
-                op->comps.unused[i] = next.unused[i];
-            break;
-        case SWS_OP_UNPACK: {
-            bool unused = true;
-            for (int i = 0; i < 4; i++) {
-                if (op->pack.pattern[i])
-                    unused &= next.unused[i];
-                op->comps.unused[i] = i > 0;
-            }
-            op->comps.unused[0] = unused;
-            break;
-        }
-        case SWS_OP_PACK:
-            for (int i = 0; i < 4; i++) {
-                if (op->pack.pattern[i])
-                    op->comps.unused[i] = next.unused[0];
-                else
-                    op->comps.unused[i] = true;
-            }
-            break;
-        case SWS_OP_CLEAR:
-            for (int i = 0; i < 4; i++) {
-                if (op->c.q4[i].den)
-                    op->comps.unused[i] = true;
-                else
-                    op->comps.unused[i] = next.unused[i];
-            }
-            break;
-        case SWS_OP_SWIZZLE: {
-            bool unused[4] = { true, true, true, true };
-            for (int i = 0; i < 4; i++)
-                unused[op->swizzle.in[i]] &= next.unused[i];
-            for (int i = 0; i < 4; i++)
-                op->comps.unused[i] = unused[i];
-            break;
-        }
-        case SWS_OP_LINEAR:
-            for (int j = 0; j < 4; j++) {
-                bool unused = true;
-                for (int i = 0; i < 4; i++) {
-                    if (op->lin.m[i][j].num)
-                        unused &= next.unused[i];
-                }
-                op->comps.unused[j] = unused;
-            }
-            break;
-        }
-
-        next = op->comps;
-    }
-}
-
 /* returns log2(x) only if x is a power of two, or 0 otherwise */
 static int exact_log2(const int x)
 {
-- 
2.49.1


>From 58fc6803333a13fea92efefe6ecd0529de389c5c Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 14:16:50 +0100
Subject: [PATCH 03/10] swscale/ops_optimizer: don't strip SwsComps from
 SWS_OP_READ

The current behavior of assuming the value range implicitly on SWS_OP_READ
has a number of serious drawbacks and shortcomings:

- It ignored the effects of SWS_OP_RSHIFT, such as for p010 and related
  MSB-aligned formats. (This is actually a bug)

- It adds a needless dependency on the "purely informative" src/dst fields
  inside SwsOpList.

- It is difficult to reason about when acted upon by SWS_OP_SWAP_BYTES, and
  the existing hack of simply ignoring SWAP_BYTES on the value range is not
  a very good solution here.

Instead, we need a more principled way for the op list generating code
to communicate extra metadata about the operations read to the optimizer.

I think the simplest way of doing this is to allow the SwsComps field attached
to SWS_OP_READ to carry additional, user-provided information about the values
read.

This requires changing ff_sws_op_list_update_comps() slightly to not completely
overwrite SwsComps on SWS_OP_READ, but instead merge the implicit information
with the explictly provided one.
---
 libswscale/ops.c | 30 ++++++++++++++++++------------
 libswscale/ops.h | 17 ++++++++++++++++-
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index 5b08b743bf..f2e9767956 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -232,14 +232,15 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
     for (int n = 0; n < ops->num_ops; n++) {
         SwsOp *op = &ops->ops[n];
 
-        /* Prefill min/max values automatically; may have to be fixed in
-         * special cases */
-        memcpy(op->comps.min, prev.min, sizeof(prev.min));
-        memcpy(op->comps.max, prev.max, sizeof(prev.max));
-
-        if (op->op != SWS_OP_SWAP_BYTES) {
-            ff_sws_apply_op_q(op, op->comps.min);
-            ff_sws_apply_op_q(op, op->comps.max);
+        if (op->op != SWS_OP_READ) {
+            /* Prefill min/max values automatically; may have to be fixed in
+            * special cases */
+            memcpy(op->comps.min, prev.min, sizeof(prev.min));
+            memcpy(op->comps.max, prev.max, sizeof(prev.max));
+            if (op->op != SWS_OP_SWAP_BYTES) {
+                ff_sws_apply_op_q(op, op->comps.min);
+                ff_sws_apply_op_q(op, op->comps.max);
+            }
         }
 
         switch (op->op) {
@@ -260,13 +261,18 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
                         }
                     }
 
-                    op->comps.flags[i] = SWS_COMP_EXACT;
-                    op->comps.min[i] = Q(0);
-                    op->comps.max[i] = Q((1ULL << bits) - 1);
+                    op->comps.flags[i] |= SWS_COMP_EXACT;
+                    op->comps.min[i] = av_max_q(Q(0), op->comps.min[i]);
+                    op->comps.max[i] = av_min_q(Q((1ULL << bits) - 1), 
op->comps.max[i]);
                 }
             }
-            for (int i = op->rw.elems; i < 4; i++)
+
+            /* Explicitly strip flags and min/max range data for unread comps 
*/
+            for (int i = op->rw.elems; i < 4; i++) {
                 op->comps.flags[i] = prev.flags[i];
+                op->comps.min[i]   = prev.min[i];
+                op->comps.max[i]   = prev.max[i];
+            }
             break;
         case SWS_OP_WRITE:
             for (int i = 0; i < op->rw.elems; i++)
diff --git a/libswscale/ops.h b/libswscale/ops.h
index 6fc7e60a02..f49bbcc601 100644
--- a/libswscale/ops.h
+++ b/libswscale/ops.h
@@ -195,7 +195,22 @@ typedef struct SwsOp {
         SwsConst        c;
     };
 
-    /* For use internal use inside ff_sws_*() functions */
+    /**
+     * Metadata about the operation's input/output components.
+     *
+     * For SWS_OP_READ, this is informative; and lets the optimizer know
+     * additional information about the value range and/or pixel data to 
expect.
+     * The default value of {0} is safe to pass in the case that no additional
+     * information is known. Extra components past the SWS_OP_READ boundary are
+     * always marked as unused/garbage.
+     *
+     * For every other operation, this metadata is discarded and regenerated
+     * automatically by `ff_sws_op_list_update_comps()`.
+     *
+     * Note that backends will only ever see optimized operation lists, so
+     * they may rely on the presence and accuracy of this metadata for all
+     * operations.
+     */
     SwsComps comps;
 } SwsOp;
 
-- 
2.49.1


>From 0adebde9a439f1ad2442c0079f11be22cd3922ac Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 14:29:52 +0100
Subject: [PATCH 04/10] swscale/format: add pixel range metadata to SWS_OP_READ

---
 libswscale/format.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/libswscale/format.c b/libswscale/format.c
index 9e1121f484..589ae71b71 100644
--- a/libswscale/format.c
+++ b/libswscale/format.c
@@ -896,16 +896,29 @@ int ff_sws_decode_pixfmt(SwsOpList *ops, enum 
AVPixelFormat fmt)
     SwsReadWriteOp rw_op;
     SwsSwizzleOp swizzle;
     SwsPackOp unpack;
+    SwsComps comps = {0};
     int shift;
 
     RET(fmt_analyze(fmt, &rw_op, &unpack, &swizzle, &shift,
                     &pixel_type, &raw_type));
 
+    /* Generate value range information for simple unpacked formats */
+    if (!(desc->flags & AV_PIX_FMT_FLAG_FLOAT) && !unpack.pattern[0]) {
+        for (int c = 0; c < desc->nb_components; c++) {
+            const int bits = desc->comp[c].depth + shift;
+            const int idx  = swizzle.in[c];
+            comps.min[idx] = Q0;
+            if (bits < 32) /* FIXME: AVRational is limited to INT_MAX */
+                comps.max[idx] = Q((1ULL << bits) - 1);
+        }
+    }
+
     /* TODO: handle subsampled or semipacked input formats */
     RET(ff_sws_op_list_append(ops, &(SwsOp) {
-        .op   = SWS_OP_READ,
-        .type = raw_type,
-        .rw   = rw_op,
+        .op    = SWS_OP_READ,
+        .type  = raw_type,
+        .rw    = rw_op,
+        .comps = comps,
     }));
 
     if ((desc->flags & AV_PIX_FMT_FLAG_BE) != NATIVE_ENDIAN_FLAG) {
-- 
2.49.1


>From cb41401eb8a4e62fc6b8029cb861564bfeab67f7 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 14:33:47 +0100
Subject: [PATCH 05/10] swscale/optimizer: remove broken value range assumption
 hack

This information is now pre-filled automatically for SWS_OP_READ when
relevant.

yuv444p10msbbe -> rgb24:
   [u16 XXXX -> +++X] SWS_OP_READ         : 3 elem(s) planar >> 0
   [u16 ...X -> +++X] SWS_OP_SWAP_BYTES
   [u16 ...X -> +++X] SWS_OP_RSHIFT       : >> 6
   [u16 ...X -> +++X] SWS_OP_CONVERT      : u16 -> f32
   [f32 ...X -> ...X] SWS_OP_LINEAR       : matrix3+off3 [...]
   [f32 ...X -> ...X] SWS_OP_DITHER       : 16x16 matrix + {0 3 2 5}
   [f32 ...X -> ...X] SWS_OP_MAX          : {0 0 0 0} <= x
+  [f32 ...X -> ...X] SWS_OP_MIN          : x <= {255 255 255 _}
   [f32 ...X -> +++X] SWS_OP_CONVERT      : f32 -> u8
   [ u8 ...X -> +++X] SWS_OP_WRITE        : 3 elem(s) packed >> 0
    (X = unused, + = exact, 0 = zero)

(This clamp is needed and was incorrectly optimized away before, because the
`SWS_OP_RSHIFT` incorrectly distorted the value range assertion)
---
 libswscale/ops.c            | 13 -------------
 tests/ref/fate/sws-ops-list |  2 +-
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index f2e9767956..25bce3c59f 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -248,19 +248,6 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
             for (int i = 0; i < op->rw.elems; i++) {
                 if (ff_sws_pixel_type_is_int(op->type)) {
                     int bits = 8 * ff_sws_pixel_type_size(op->type) >> 
op->rw.frac;
-                    if (!op->rw.packed && ops->src.desc) {
-                        /* Use legal value range from pixdesc if available;
-                         * we don't need to do this for packed formats because
-                         * non-byte-aligned packed formats will necessarily go
-                         * through SWS_OP_UNPACK anyway */
-                        for (int c = 0; c < 4; c++) {
-                            if (ops->src.desc->comp[c].plane == i) {
-                                bits = ops->src.desc->comp[c].depth;
-                                break;
-                            }
-                        }
-                    }
-
                     op->comps.flags[i] |= SWS_COMP_EXACT;
                     op->comps.min[i] = av_max_q(Q(0), op->comps.min[i]);
                     op->comps.max[i] = av_min_q(Q((1ULL << bits) - 1), 
op->comps.max[i]);
diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list
index a7d6149d8b..1ec4e5af79 100644
--- a/tests/ref/fate/sws-ops-list
+++ b/tests/ref/fate/sws-ops-list
@@ -1 +1 @@
-ef1dd10af970984495f6008e43d0fe1b
+08b3e768ae2f518b73897ab5bad459e9
-- 
2.49.1


>From c70c1101751938df21b6f91fe76e9e115e86f94a Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 13:51:54 +0100
Subject: [PATCH 06/10] swscale/ops: add SWS_COMP_SWAPPED

This flag keeps track of whether a pixel is currently byte-swapped or
not. Not needed by current backends, but informative and useful for
catching potential endianness errors.

Updates a lot of FATE tests with a cosmetic diff like this:

 rgb24 -> gray16be:
   [ u8 XXXX -> +++X] SWS_OP_READ         : 3 elem(s) packed >> 0
   [ u8 ...X -> +++X] SWS_OP_CONVERT      : u8 -> f32
   [f32 ...X -> .++X] SWS_OP_LINEAR       : dot3 [...]
   [f32 .XXX -> +++X] SWS_OP_CONVERT      : f32 -> u16
-  [u16 .XXX -> +++X] SWS_OP_SWAP_BYTES
-  [u16 .XXX -> +++X] SWS_OP_WRITE        : 1 elem(s) planar >> 0
+  [u16 .XXX -> zzzX] SWS_OP_SWAP_BYTES
+  [u16 .XXX -> zzzX] SWS_OP_WRITE        : 1 elem(s) planar >> 0
     (X = unused, + = exact, 0 = zero)

(The choice of `z` to represent swapped integers is arbitrary, but I think
it's visually evocative and distinct from the other symbols)
---
 libswscale/format.c         | 8 +++++++-
 libswscale/ops.c            | 7 ++++++-
 libswscale/ops.h            | 1 +
 tests/ref/fate/sws-ops-list | 2 +-
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/libswscale/format.c b/libswscale/format.c
index 589ae71b71..90c9404518 100644
--- a/libswscale/format.c
+++ b/libswscale/format.c
@@ -913,6 +913,12 @@ int ff_sws_decode_pixfmt(SwsOpList *ops, enum 
AVPixelFormat fmt)
         }
     }
 
+    const int swapped = (desc->flags & AV_PIX_FMT_FLAG_BE) != 
NATIVE_ENDIAN_FLAG;
+    if (swapped) {
+        for (int i = 0; i < 4; i++)
+            comps.flags[i] |= SWS_COMP_SWAPPED;
+    }
+
     /* TODO: handle subsampled or semipacked input formats */
     RET(ff_sws_op_list_append(ops, &(SwsOp) {
         .op    = SWS_OP_READ,
@@ -921,7 +927,7 @@ int ff_sws_decode_pixfmt(SwsOpList *ops, enum AVPixelFormat 
fmt)
         .comps = comps,
     }));
 
-    if ((desc->flags & AV_PIX_FMT_FLAG_BE) != NATIVE_ENDIAN_FLAG) {
+    if (swapped) {
         RET(ff_sws_op_list_append(ops, &(SwsOp) {
             .op   = SWS_OP_SWAP_BYTES,
             .type = raw_type,
diff --git a/libswscale/ops.c b/libswscale/ops.c
index 25bce3c59f..2bf4550444 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -261,11 +261,14 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
                 op->comps.max[i]   = prev.max[i];
             }
             break;
+        case SWS_OP_SWAP_BYTES:
+            for (int i = 0; i < 4; i++)
+                op->comps.flags[i] = prev.flags[i] ^ SWS_COMP_SWAPPED;
+            break;
         case SWS_OP_WRITE:
             for (int i = 0; i < op->rw.elems; i++)
                 av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE));
             /* fall through */
-        case SWS_OP_SWAP_BYTES:
         case SWS_OP_LSHIFT:
         case SWS_OP_RSHIFT:
         case SWS_OP_MIN:
@@ -607,6 +610,8 @@ static char describe_comp_flags(unsigned flags)
         return 'X';
     else if (flags & SWS_COMP_ZERO)
         return '0';
+    else if (flags & SWS_COMP_SWAPPED)
+        return 'z';
     else if (flags & SWS_COMP_EXACT)
         return '+';
     else
diff --git a/libswscale/ops.h b/libswscale/ops.h
index f49bbcc601..058fe9c77c 100644
--- a/libswscale/ops.h
+++ b/libswscale/ops.h
@@ -73,6 +73,7 @@ enum SwsCompFlags {
     SWS_COMP_GARBAGE = 1 << 0, /* contents are undefined / garbage data */
     SWS_COMP_EXACT   = 1 << 1, /* value is an exact integer */
     SWS_COMP_ZERO    = 1 << 2, /* known to be a constant zero */
+    SWS_COMP_SWAPPED = 1 << 3, /* byte order is swapped */
 };
 
 typedef union SwsConst {
diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list
index 1ec4e5af79..5b5797e962 100644
--- a/tests/ref/fate/sws-ops-list
+++ b/tests/ref/fate/sws-ops-list
@@ -1 +1 @@
-08b3e768ae2f518b73897ab5bad459e9
+7c45512e9589aad5843e6ad3e430267b
-- 
2.49.1


>From 3c19f5b093dda38bc17cdecc57ceb9bbfc41d2b4 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 14:58:42 +0100
Subject: [PATCH 07/10] swscale/ops: use switch/case for updating SwsComps
 ranges

A bit more readable and makes it clear what the special cases are.
---
 libswscale/ops.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index 2bf4550444..b703956c5c 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -232,15 +232,17 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
     for (int n = 0; n < ops->num_ops; n++) {
         SwsOp *op = &ops->ops[n];
 
-        if (op->op != SWS_OP_READ) {
-            /* Prefill min/max values automatically; may have to be fixed in
-            * special cases */
+        switch (op->op) {
+        case SWS_OP_READ:
+        case SWS_OP_LINEAR:
+        case SWS_OP_SWAP_BYTES:
+            break; /* special cases, handled below */
+        default:
             memcpy(op->comps.min, prev.min, sizeof(prev.min));
             memcpy(op->comps.max, prev.max, sizeof(prev.max));
-            if (op->op != SWS_OP_SWAP_BYTES) {
-                ff_sws_apply_op_q(op, op->comps.min);
-                ff_sws_apply_op_q(op, op->comps.max);
-            }
+            ff_sws_apply_op_q(op, op->comps.min);
+            ff_sws_apply_op_q(op, op->comps.max);
+            break;
         }
 
         switch (op->op) {
@@ -262,8 +264,11 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
             }
             break;
         case SWS_OP_SWAP_BYTES:
-            for (int i = 0; i < 4; i++)
+            for (int i = 0; i < 4; i++) {
                 op->comps.flags[i] = prev.flags[i] ^ SWS_COMP_SWAPPED;
+                op->comps.min[i]   = prev.min[i];
+                op->comps.max[i]   = prev.max[i];
+            }
             break;
         case SWS_OP_WRITE:
             for (int i = 0; i < op->rw.elems; i++)
-- 
2.49.1


>From 8c5b178f14a61c386612de4cbfd7f6d63c3bc2ae Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 15:01:46 +0100
Subject: [PATCH 08/10] swscale/ops: explicitly reset value range after
 SWS_OP_UNPACK

The current logic implicitly pulled the new value range out of SWS_OP_ASSUME,
but this was quite ill-formed and not very robust. In particular, it only
worked because of the implicit assumption that the value range was always
set to 0b1111...111.

This actually poses a serious problem for 32-bit packed formats, whose
value range actually does not fit into AVRational. In the past, it only
worked because the value would implicitly overflow to -1, which SWS_OP_UNPACK
would then correctly extract the bits out from again.

In general, it's cleaner (and sufficient) to just explicitly reset the value
range on SWS_OP_UNPACK again.
---
 libswscale/ops.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index b703956c5c..4be43ed7c1 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -236,6 +236,7 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
         case SWS_OP_READ:
         case SWS_OP_LINEAR:
         case SWS_OP_SWAP_BYTES:
+        case SWS_OP_UNPACK:
             break; /* special cases, handled below */
         default:
             memcpy(op->comps.min, prev.min, sizeof(prev.min));
@@ -289,9 +290,13 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
             break;
         case SWS_OP_UNPACK:
             for (int i = 0; i < 4; i++) {
-                if (op->pack.pattern[i])
+                const int pattern = op->pack.pattern[i];
+                if (pattern) {
+                    av_assert1(pattern < 32);
                     op->comps.flags[i] = prev.flags[0];
-                else
+                    op->comps.min[i]   = Q(0);
+                    op->comps.max[i]   = Q((1L << pattern) - 1);
+                } else
                     op->comps.flags[i] = SWS_COMP_GARBAGE;
             }
             break;
-- 
2.49.1


>From 099a620f3d3845d1d2b01b297fa46fcb53cf20cd Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 22 Dec 2025 15:42:08 +0100
Subject: [PATCH 09/10] swscale/ops: don't overflow AVRational range

AVRational64 is not quite ready, so avoid UB here for now.
---
 libswscale/ops.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index 4be43ed7c1..8a4a4a26c0 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -250,10 +250,12 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
         case SWS_OP_READ:
             for (int i = 0; i < op->rw.elems; i++) {
                 if (ff_sws_pixel_type_is_int(op->type)) {
-                    int bits = 8 * ff_sws_pixel_type_size(op->type) >> 
op->rw.frac;
+                    const int bits = 8 * ff_sws_pixel_type_size(op->type) >> 
op->rw.frac;
+                    const uint64_t range = (1ULL << bits) - 1;
                     op->comps.flags[i] |= SWS_COMP_EXACT;
                     op->comps.min[i] = av_max_q(Q(0), op->comps.min[i]);
-                    op->comps.max[i] = av_min_q(Q((1ULL << bits) - 1), 
op->comps.max[i]);
+                    if (range <= INT_MAX) /* FIXME: AVRational is limited to 
INT_MAX */
+                        op->comps.max[i] = av_min_q(Q(range), 
op->comps.max[i]);
                 }
             }
 
-- 
2.49.1


>From be834ed1159148bc241c27eac9ca45df9ecb3df4 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Mon, 15 Dec 2025 18:34:47 +0100
Subject: [PATCH 10/10] Revert "swscale/ops: clarify SwsOpList.src/dst
 semantics"

This reverts commit c94e8afe5d195fb08c441fbe3f8c2295081fcf8b.

These are now actually purely informational.
---
 libswscale/ops.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libswscale/ops.h b/libswscale/ops.h
index 058fe9c77c..9e5e55f5ac 100644
--- a/libswscale/ops.h
+++ b/libswscale/ops.h
@@ -232,8 +232,8 @@ typedef struct SwsOpList {
     SwsOp *ops;
     int num_ops;
 
-    /* Metadata associated with this operation list */
-    SwsFormat src, dst; /* if set; may inform the optimizer about e.g value 
ranges */
+    /* Purely informative metadata associated with this operation list */
+    SwsFormat src, dst;
 } SwsOpList;
 
 SwsOpList *ff_sws_op_list_alloc(void);
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to