This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 8b346363aa10e2c5de31c4c87abec3f28fd709bc
Author:     Niklas Haas <[email protected]>
AuthorDate: Wed Feb 25 16:52:09 2026 +0100
Commit:     Niklas Haas <[email protected]>
CommitDate: Thu Feb 26 13:09:14 2026 +0000

    swscale/ops_backend: implement support for optional dither indices
    
    If you place the branch inside the loop, gcc at least reverts back to scalar
    code, so better to just split up and guard the entire loop.
    
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_chain.h      |  1 +
 libswscale/ops_tmpl_float.c | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h
index 0bc8c01283..56543c6b0b 100644
--- a/libswscale/ops_chain.h
+++ b/libswscale/ops_chain.h
@@ -44,6 +44,7 @@ typedef union SwsOpPriv {
 
     /* Common types */
     void *ptr;
+    int8_t    i8[16];
     uint8_t   u8[16];
     uint16_t u16[8];
     uint32_t u32[4];
diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c
index 10749d5f7d..2f1d249168 100644
--- a/libswscale/ops_tmpl_float.c
+++ b/libswscale/ops_tmpl_float.c
@@ -57,7 +57,7 @@ DECL_SETUP(setup_dither)
         return AVERROR(ENOMEM);
 
     static_assert(sizeof(out->ptr) <= sizeof(uint8_t[8]), ">8 byte pointers 
not supported");
-    uint8_t *offset = &out->u8[8];
+    int8_t *offset = &out->i8[8];
     for (int i = 0; i < 4; i++)
         offset[i] = op->dither.y_offset[i];
 
@@ -74,25 +74,26 @@ DECL_SETUP(setup_dither)
 DECL_FUNC(dither, const int size_log2)
 {
     const pixel_t *restrict matrix = impl->priv.ptr;
-    const uint8_t *offset = &impl->priv.u8[8];
+    const int8_t *restrict offset = &impl->priv.i8[8];
     const int mask = (1 << size_log2) - 1;
     const int y_line = iter->y;
-    const int row0 = (y_line + offset[0]) & mask;
-    const int row1 = (y_line + offset[1]) & mask;
-    const int row2 = (y_line + offset[2]) & mask;
-    const int row3 = (y_line + offset[3]) & mask;
     const int size = 1 << size_log2;
     const int width = FFMAX(size, SWS_BLOCK_SIZE);
     const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1);
 
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        x[i] += size_log2 ? matrix[row0 * width + base + i] : (pixel_t) 0.5;
-        y[i] += size_log2 ? matrix[row1 * width + base + i] : (pixel_t) 0.5;
-        z[i] += size_log2 ? matrix[row2 * width + base + i] : (pixel_t) 0.5;
-        w[i] += size_log2 ? matrix[row3 * width + base + i] : (pixel_t) 0.5;
+#define DITHER_COMP(VAR, IDX)                                                  
          \
+    if (offset[IDX] >= 0) {                                                    
          \
+        const int row = (y_line + offset[IDX]) & mask;                         
          \
+        SWS_LOOP                                                               
          \
+        for (int i = 0; i < SWS_BLOCK_SIZE; i++)                               
          \
+            VAR[i] += size_log2 ? matrix[row * width + base + i] : (pixel_t) 
0.5;        \
     }
 
+    DITHER_COMP(x, 0)
+    DITHER_COMP(y, 1)
+    DITHER_COMP(z, 2)
+    DITHER_COMP(w, 3)
+
     CONTINUE(block_t, x, y, z, w);
 }
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to