x86: remove broken and unnecessary 1x1 dither fast path

Niklas Haas via ffmpeg-cvslog Tue, 09 Jun 2026 10:04:59 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit ca8774b9d61bef432a75f77e5b9461f18800d83c
Author:     Niklas Haas <[email protected]>
AuthorDate: Sun Apr 19 13:46:15 2026 +0200
Commit:     Niklas Haas <[email protected]>
CommitDate: Tue Jun 9 18:27:20 2026 +0200

    swscale/x86: remove broken and unnecessary 1x1 dither fast path
    
    This is broken because it fails to check dither.y_offset[] to determine if
    dithering for a channel is requested or not.
    
    This is unnecessary because the generic dither code already jumps over 
unused
    components, which is cheap enough not to worry about this special case for
    now.
    
    This code will, in any case, soon be replaced by a uops_macros.h-derived
    approach. This commit is only needed as a stopgap to make checkasm continue
    working after the sws_uops refactor.
    
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/x86/ops.c         | 27 +++++++++++----------------
 libswscale/x86/ops_float.asm | 28 ++++++++++------------------
 2 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index 1191a2fc35..2087176cee 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -209,14 +209,8 @@ static int setup_shift(const SwsImplParams *params, 
SwsImplResult *out)
 static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
 {
     const SwsOp *op = params->op;
-    /* 1x1 matrix / single constant */
-    if (!op->dither.size_log2) {
-        const AVRational k = op->dither.matrix[0];
-        out->priv.f32[0] = (float) k.num / k.den;
-        return 0;
-    }
-
     const int size = 1 << op->dither.size_log2;
+    const int stride = size * sizeof(float);
     const int8_t *off = op->dither.y_offset;
     int max_offset = 0;
     for (int i = 0; i < 4; i++) {
@@ -224,11 +218,17 @@ static int setup_dither(const SwsImplParams *params, 
SwsImplResult *out)
             max_offset = FFMAX(max_offset, off[i] & (size - 1));
     }
 
+    /* 1x1 matrix / single constant */
+    if (!op->dither.size_log2) {
+        const AVRational k = op->dither.matrix[0];
+        out->priv.f32[0] = (float) k.num / k.den;
+        goto store_offsets;
+    }
+
     /* Allocate extra rows to allow over-reading for row offsets. Note that
      * max_offset is currently never larger than 5, so the extra space needed
      * for this over-allocation is bounded by 5 * size * sizeof(float),
      * typically 320 bytes for a 16x16 dither matrix. */
-    const int stride = size * sizeof(float);
     const int num_rows = size + max_offset;
     float *matrix = out->priv.ptr = av_mallocz(num_rows * stride);
     if (!matrix)
@@ -240,6 +240,7 @@ static int setup_dither(const SwsImplParams *params, 
SwsImplResult *out)
 
     memcpy(&matrix[size * size], matrix, max_offset * stride);
 
+store_offsets:
     /* Store relative pointer offset to each row inside extra space */
     static_assert(sizeof(out->priv.ptr) <= sizeof(int16_t[4]),
                   ">8 byte pointers not supported");
@@ -251,12 +252,6 @@ static int setup_dither(const SwsImplParams *params, 
SwsImplResult *out)
     return 0;
 }
 
-#define DECL_DITHER0(EXT)                                                      
 \
-    DECL_COMMON_PATTERNS(F32, dither0##EXT,                                    
 \
-        .op    = SWS_OP_DITHER,                                                
 \
-        .setup = setup_dither,                                                 
 \
-    );
-
 #define DECL_DITHER(EXT, SIZE)                                                 
 \
     DECL_ASM(F32, SWS_COMP_ALL, dither##SIZE##EXT,                             
 \
         .op    = SWS_OP_DITHER,                                                
 \
@@ -729,7 +724,7 @@ static const SwsOpTable ops16##EXT = {
     DECL_EXPAND(EXT,   U8, U32)                                                
 \
     DECL_MIN_MAX(EXT)                                                          
 \
     DECL_SCALE(EXT)                                                            
 \
-    DECL_DITHER0(EXT)                                                          
 \
+    DECL_DITHER(EXT, 0)                                                        
 \
     DECL_DITHER(EXT, 1)                                                        
 \
     DECL_DITHER(EXT, 2)                                                        
 \
     DECL_DITHER(EXT, 3)                                                        
 \
@@ -790,7 +785,7 @@ static const SwsOpTable ops32##EXT = {
         REF_COMMON_PATTERNS(min##EXT),                                         
 \
         REF_COMMON_PATTERNS(max##EXT),                                         
 \
         REF_COMMON_PATTERNS(scale##EXT),                                       
 \
-        REF_COMMON_PATTERNS(dither0##EXT),                                     
 \
+        &op_dither0##EXT,                                                      
 \
         &op_dither1##EXT,                                                      
 \
         &op_dither2##EXT,                                                      
 \
         &op_dither3##EXT,                                                      
 \
diff --git a/libswscale/x86/ops_float.asm b/libswscale/x86/ops_float.asm
index 845f0081db..605a9aa005 100644
--- a/libswscale/x86/ops_float.asm
+++ b/libswscale/x86/ops_float.asm
@@ -209,28 +209,15 @@ IF W,   mulps mw2, m8
 ;---------------------------------------------------------
 ; Dithering
 
-%macro dither0 0
-op dither0
-        ; constant offset for all channels
-        vbroadcastss m8, [implq + SwsOpImpl.priv]
-        LOAD_CONT tmp0q
-IF X,   addps mx, m8
-IF Y,   addps my, m8
-IF Z,   addps mz, m8
-IF W,   addps mw, m8
-IF X,   addps mx2, m8
-IF Y,   addps my2, m8
-IF Z,   addps mz2, m8
-IF W,   addps mw2, m8
-        CONTINUE tmp0q
-%endmacro
-
 %macro dither_row 5 ; size_log2, comp_idx, matrix, out, out2
         mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i]
         ; test is tmp0w < 0
         test tmp0w, tmp0w
         js .skip%2
-%if %1 == 1
+%if %1 == 0
+        addps %4, m8
+        addps %5, m8
+%elif %1 == 1
         vbroadcastsd m8, [%3 + tmp0q]
         addps %4, m8
         addps %5, m8
@@ -247,6 +234,10 @@ IF W,   addps mw2, m8
 
 %macro dither 1 ; size_log2
 op dither%1
+%if %1 == 0
+        ; dither offset is constant
+        vbroadcastss m8, [implq + SwsOpImpl.priv]
+%else
         ; dither matrix is stored indirectly at the private data address
         mov tmp1q, [implq + SwsOpImpl.priv]
         ; add y offset. note that for 2x2, we would only need to look at the
@@ -264,6 +255,7 @@ op dither%1
         and tmp0d, (4 << %1) - 1
         add tmp1q, tmp0q
     %endif
+%endif
         dither_row %1, 0, tmp1q, mx, mx2
         dither_row %1, 1, tmp1q, my, my2
         dither_row %1, 2, tmp1q, mz, mz2
@@ -272,7 +264,7 @@ op dither%1
 %endmacro
 
 %macro dither_fns 0
-        decl_common_patterns dither0
+        dither 0
         dither 1
         dither 2
         dither 3

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 15/34: swscale/x86: remove broken and unnecessary 1x1 dither fast path

Reply via email to