ops: add support for optional dither indices

Niklas Haas via ffmpeg-cvslog Thu, 26 Feb 2026 05:10:23 -0800

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit ce096aa4ee9197701502d0c32e3c7f56c44d863e
Author:     Niklas Haas <[email protected]>
AuthorDate: Wed Feb 25 17:00:07 2026 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Thu Feb 26 13:09:14 2026 +0000

    swscale/x86/ops: add support for optional dither indices
    
    Instead of defining multiple patterns for the dither ops, just define a
    single generic function that branches internally. The branch is 
well-predicted
    and ridiculously cheap. At least on my end, within margin of error.
    
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_chain.h       |  1 +
 libswscale/x86/ops.c         | 51 ++++++++++++++++++++++----------------------
 libswscale/x86/ops_float.asm |  8 +++++--
 3 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h
index 56543c6b0b..532622fd2c 100644
--- a/libswscale/ops_chain.h
+++ b/libswscale/ops_chain.h
@@ -47,6 +47,7 @@ typedef union SwsOpPriv {
     int8_t    i8[16];
     uint8_t   u8[16];
     uint16_t u16[8];
+    int16_t  i16[8];
     uint32_t u32[4];
     float    f32[4];
 } SwsOpPriv;
diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index 0db6837c3a..44bdf159c4 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -201,10 +201,11 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out)
     }
 
     const int size = 1 << op->dither.size_log2;
+    const int8_t *off = op->dither.y_offset;
     int max_offset = 0;
     for (int i = 0; i < 4; i++) {
-        const int offset = op->dither.y_offset[i] & (size - 1);
-        max_offset = FFMAX(max_offset, offset);
+        if (off[i] >= 0)
+            max_offset = FFMAX(max_offset, off[i] & (size - 1));
     }
 
     /* Allocate extra rows to allow over-reading for row offsets. Note that
@@ -223,17 +224,17 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out)
     memcpy(&matrix[size * size], matrix, max_offset * stride);
 
     /* Store relative pointer offset to each row inside extra space */
-    static_assert(sizeof(out->ptr) <= sizeof(uint16_t[4]), ">8 byte pointers 
not supported");
-    assert(max_offset * stride <= UINT16_MAX);
-    uint16_t *offset = &out->u16[4];
+    static_assert(sizeof(out->ptr) <= sizeof(int16_t[4]), ">8 byte pointers 
not supported");
+    assert(max_offset * stride <= INT16_MAX);
+    int16_t *off_out = &out->i16[4];
     for (int i = 0; i < 4; i++)
-        offset[i] = (op->dither.y_offset[i] & (size - 1)) * stride;
+        off_out[i] = off[i] >= 0 ? (off[i] & (size - 1)) * stride : -1;
 
     return 0;
 }
 
-#define DECL_DITHER(EXT, SIZE)                                                 
 \
-    DECL_COMMON_PATTERNS(F32, dither##SIZE##EXT,                               
 \
+#define DECL_DITHER(DECL_MACRO, EXT, SIZE)                                     
 \
+    DECL_MACRO(F32, dither##SIZE##EXT,                                         
 \
         .op    = SWS_OP_DITHER,                                                
 \
         .setup = setup_dither,                                                 
 \
         .free  = (SIZE) ? av_free : NULL,                                      
 \
@@ -453,15 +454,15 @@ static const SwsOpTable ops16##EXT = {
     DECL_EXPAND(EXT,   U8, U32)                                                
 \
     DECL_MIN_MAX(EXT)                                                          
 \
     DECL_SCALE(EXT)                                                            
 \
-    DECL_DITHER(EXT, 0)                                                        
 \
-    DECL_DITHER(EXT, 1)                                                        
 \
-    DECL_DITHER(EXT, 2)                                                        
 \
-    DECL_DITHER(EXT, 3)                                                        
 \
-    DECL_DITHER(EXT, 4)                                                        
 \
-    DECL_DITHER(EXT, 5)                                                        
 \
-    DECL_DITHER(EXT, 6)                                                        
 \
-    DECL_DITHER(EXT, 7)                                                        
 \
-    DECL_DITHER(EXT, 8)                                                        
 \
+    DECL_DITHER(DECL_COMMON_PATTERNS, EXT, 0)                                  
 \
+    DECL_DITHER(DECL_ASM, EXT, 1)                                              
 \
+    DECL_DITHER(DECL_ASM, EXT, 2)                                              
 \
+    DECL_DITHER(DECL_ASM, EXT, 3)                                              
 \
+    DECL_DITHER(DECL_ASM, EXT, 4)                                              
 \
+    DECL_DITHER(DECL_ASM, EXT, 5)                                              
 \
+    DECL_DITHER(DECL_ASM, EXT, 6)                                              
 \
+    DECL_DITHER(DECL_ASM, EXT, 7)                                              
 \
+    DECL_DITHER(DECL_ASM, EXT, 8)                                              
 \
     DECL_LINEAR(EXT, luma,      SWS_MASK_LUMA)                                 
 \
     DECL_LINEAR(EXT, alpha,     SWS_MASK_ALPHA)                                
 \
     DECL_LINEAR(EXT, lumalpha,  SWS_MASK_LUMA | SWS_MASK_ALPHA)                
 \
@@ -505,14 +506,14 @@ static const SwsOpTable ops32##EXT = {
         REF_COMMON_PATTERNS(max##EXT),                                         
 \
         REF_COMMON_PATTERNS(scale##EXT),                                       
 \
         REF_COMMON_PATTERNS(dither0##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither1##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither2##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither3##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither4##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither5##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither6##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither7##EXT),                                     
 \
-        REF_COMMON_PATTERNS(dither8##EXT),                                     
 \
+        &op_dither1##EXT,                                                      
 \
+        &op_dither2##EXT,                                                      
 \
+        &op_dither3##EXT,                                                      
 \
+        &op_dither4##EXT,                                                      
 \
+        &op_dither5##EXT,                                                      
 \
+        &op_dither6##EXT,                                                      
 \
+        &op_dither7##EXT,                                                      
 \
+        &op_dither8##EXT,                                                      
 \
         &op_luma##EXT,                                                         
 \
         &op_alpha##EXT,                                                        
 \
         &op_lumalpha##EXT,                                                     
 \
diff --git a/libswscale/x86/ops_float.asm b/libswscale/x86/ops_float.asm
index 78f35a9785..c9dc408a9b 100644
--- a/libswscale/x86/ops_float.asm
+++ b/libswscale/x86/ops_float.asm
@@ -197,6 +197,9 @@ IF W,   addps mw2, m8
 
 %macro dither_row 5 ; size_log2, comp_idx, matrix, out, out2
         mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i]
+        ; test is tmp0w < 0
+        test tmp0w, tmp0w
+        js .skip%2
 %if %1 == 1
         vbroadcastsd m8, [%3 + tmp0q]
         addps %4, m8
@@ -209,6 +212,7 @@ IF W,   addps mw2, m8
         addps %4, [%3 + tmp0q]
         addps %5, [%3 + tmp0q + mmsize * ((4 << %1) > mmsize)]
 %endif
+.skip%2:
 %endmacro
 
 %macro dither 1 ; size_log2
@@ -238,7 +242,7 @@ op dither%1
 %endmacro
 
 %macro dither_fns 0
-        dither0
+        decl_common_patterns dither0
         dither 1
         dither 2
         dither 3
@@ -364,5 +368,5 @@ decl_common_patterns conv32fto8
 decl_common_patterns conv32fto16
 decl_common_patterns min_max
 decl_common_patterns scale
-decl_common_patterns dither_fns
+dither_fns
 linear_fns

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 05/06: swscale/x86/ops: add support for optional dither indices

Reply via email to