ops_float: remove special case for 2x2 matrix

Niklas Haas via ffmpeg-cvslog Mon, 15 Dec 2025 06:32:50 -0800

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit b1c96b99faa145495415272875943f4e1047e4aa
Author:     Niklas Haas <[email protected]>
AuthorDate: Wed Dec 3 18:19:04 2025 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Mon Dec 15 14:31:58 2025 +0000

    swscale/x86/ops_float: remove special case for 2x2 matrix
    
    This is an exceptionally unlikely (in fact, currently impossible) case to
    actually hit, and not worth micro-optimizing for. More specifically, having
    this special case prevents me from easily adding per-row offsets.
---
 libswscale/x86/ops.c         | 20 +++++++++++---------
 libswscale/x86/ops_float.asm | 25 ++++++++-----------------
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index 97bee93f5b..ecbd79564e 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -184,18 +184,20 @@ static int setup_shift(const SwsOp *op, SwsOpPriv *out)
         .setup = ff_sws_setup_q,                                               
 \
     );
 
-/* 2x2 matrix fits inside SwsOpPriv directly; save an indirect in this case */
-static_assert(sizeof(SwsOpPriv) >= sizeof(float[2][2]), "2x2 dither matrix too 
large");
 static int setup_dither(const SwsOp *op, SwsOpPriv *out)
 {
-    const int size = 1 << op->dither.size_log2;
-    float *matrix = out->f32;
-    if (size > 2) {
-        matrix = out->ptr = av_mallocz(size * size * sizeof(*matrix));
-        if (!matrix)
-            return AVERROR(ENOMEM);
+    /* 1x1 matrix / single constant */
+    if (!op->dither.size_log2) {
+        const AVRational k = op->dither.matrix[0];
+        out->f32[0] = (float) k.num / k.den;
+        return 0;
     }
 
+    const int size = 1 << op->dither.size_log2;
+    float *matrix = out->ptr = av_mallocz(size * size * sizeof(*matrix));
+    if (!matrix)
+        return AVERROR(ENOMEM);
+
     for (int i = 0; i < size * size; i++)
         matrix[i] = (float) op->dither.matrix[i].num / 
op->dither.matrix[i].den;
 
@@ -206,7 +208,7 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out)
     DECL_COMMON_PATTERNS(F32, dither##SIZE##EXT,                               
 \
         .op    = SWS_OP_DITHER,                                                
 \
         .setup = setup_dither,                                                 
 \
-        .free  = (1 << SIZE) > 2 ? av_free : NULL,                             
 \
+        .free  = (SIZE) ? av_free : NULL,                                      
 \
         .dither_size = SIZE,                                                   
 \
     );
 
diff --git a/libswscale/x86/ops_float.asm b/libswscale/x86/ops_float.asm
index 6bdbd1b74e..ef08212fd6 100644
--- a/libswscale/x86/ops_float.asm
+++ b/libswscale/x86/ops_float.asm
@@ -183,7 +183,9 @@ IF W,   mulps mw2, m8
         lea tmp0q, %2
         and tmp0q, (1 << %1) - 1
         shl tmp0q, %1+2
-%if %1 == 2
+%if %1 == 1
+        vbroadcastsd   %4, [%3 + tmp0q]
+%elif %1 == 2
         VBROADCASTI128 %4, [%3 + tmp0q]
 %else
         mova %4, [%3 + tmp0q]
@@ -209,23 +211,12 @@ op dither%1
         %define DY DX
         %define DZ DX
         %define DW DX
-%elif %1 == 1
-        ; 2x2 matrix, only sign of y matters
-        mov tmp0d, yd
-        and tmp0d, 1
-        shl tmp0d, 3
-    %if X || Z
-        ; dither matrix is stored directly in the private data
-        vbroadcastsd DX, [implq + SwsOpImpl.priv + tmp0q]
-    %endif
-    %if Y || W
-        xor tmp0d, 8
-        vbroadcastsd DY, [implq + SwsOpImpl.priv + tmp0q]
-    %endif
-        %define DZ DX
-        %define DW DY
 %else
-        ; matrix is at least 4x4, load all four channels with custom offset
+        ; load all four channels with custom offset
+        ;
+        ; note that for 2x2, we would only need to look at the sign of `y`, but
+        ; this special case is ignored for simplicity reasons (and because
+        ; the current upstream format code never generates matrices that small)
     %if (4 << %1) > mmsize
         %define DX2 m12
         %define DY2 m13

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 04/11: swscale/x86/ops_float: remove special case for 2x2 matrix

Reply via email to