This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit addee699551473d5adfc1d9180cc735ddd15e530
Author:     Niklas Haas <[email protected]>
AuthorDate: Tue Jun 2 14:30:17 2026 +0200
Commit:     Niklas Haas <[email protected]>
CommitDate: Thu Jun 11 16:27:47 2026 +0000

    swscale/ops_dispatch: generalize block_size_in/out to array
    
    See previous commit for justification. I decided to split these
    refactors up into several independent commits to make it easier
    to review and bisect, since they are all independent atomic changes.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_dispatch.c      | 29 ++++++++++++++++-------------
 libswscale/ops_dispatch.h      |  6 +++---
 libswscale/ops_memcpy.c        |  2 +-
 libswscale/x86/ops_include.asm |  4 ++--
 tests/checkasm/sw_ops.c        |  8 ++++----
 5 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c
index fcf77204db..069474c056 100644
--- a/libswscale/ops_dispatch.c
+++ b/libswscale/ops_dispatch.c
@@ -242,7 +242,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
                                                 safe_bytes - filter_size,
                                                 exec->in_offset_x);
         } else {
-            safe_blocks_in = safe_bytes / exec->block_size_in;
+            safe_blocks_in = safe_bytes / exec->block_size_in[i];
         }
 
         if (safe_blocks_in < num_blocks) {
@@ -251,7 +251,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
             safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
         }
 
-        size_t loop_size   = num_blocks * exec->block_size_in;
+        size_t loop_size   = num_blocks * exec->block_size_in[i];
         exec->in[i]        = in->data[idx];
         exec->in_stride[i] = in->linesize[idx];
         exec->in_bump[i]   = in->linesize[idx] - loop_size;
@@ -265,13 +265,13 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
         int sub_x  = chroma ? outdesc->log2_chroma_w : 0;
         int sub_y  = chroma ? outdesc->log2_chroma_h : 0;
         size_t safe_bytes = safe_bytes_pad(out->linesize[idx], 
comp->over_write[i]);
-        size_t safe_blocks_out = safe_bytes / exec->block_size_out;
+        size_t safe_blocks_out = safe_bytes / exec->block_size_out[i];
         if (safe_blocks_out < num_blocks) {
             p->memcpy_out = true;
             safe_blocks   = FFMIN(safe_blocks, safe_blocks_out);
         }
 
-        size_t loop_size    = num_blocks * exec->block_size_out;
+        size_t loop_size    = num_blocks * exec->block_size_out[i];
         exec->out[i]        = out->data[idx];
         exec->out_stride[i] = out->linesize[idx];
         exec->out_bump[i]   = out->linesize[idx] - loop_size;
@@ -317,7 +317,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
         } else {
             needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, 
AV_ROUND_UP);
         }
-        size_t loop_size   = p->tail_blocks * exec->block_size_in;
+        size_t loop_size   = p->tail_blocks * exec->block_size_in[i];
         tail->in_stride[i] = FFALIGN(needed_size + comp->over_read[i], align);
         tail->in_bump[i]   = tail->in_stride[i] - loop_size;
         alloc_size += tail->in_stride[i] * in->height;
@@ -325,7 +325,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
 
     for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
         size_t needed_size  = pixel_bytes(alloc_width, p->pixel_bits_out, 
AV_ROUND_UP);
-        size_t loop_size    = p->tail_blocks * exec->block_size_out;
+        size_t loop_size    = p->tail_blocks * exec->block_size_out[i];
         tail->out_stride[i] = FFALIGN(needed_size + comp->over_write[i], 
align);
         tail->out_bump[i]   = tail->out_stride[i] - loop_size;
         alloc_size += tail->out_stride[i] * out->height;
@@ -419,8 +419,8 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame 
*in, const int y,
             /* We process fewer blocks, so the in_bump needs to be increased
              * to reflect that the plane pointers are left on the last block,
              * not the end of the processed line, after each loop iteration */
-            exec.in_bump[i]  += exec.block_size_in  * tail_blocks;
-            exec.out_bump[i] += exec.block_size_out * tail_blocks;
+            exec.in_bump[i]  += exec.block_size_in[i]  * tail_blocks;
+            exec.out_bump[i] += exec.block_size_out[i] * tail_blocks;
         }
 
         comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
@@ -448,7 +448,7 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame 
*in, const int y,
                        exec.in[i], exec.in_stride[i], lines, p->tail_size_in);
         } else {
             /* Reuse input pointers directly */
-            const size_t loop_size = tail_blocks * exec.block_size_in;
+            const size_t loop_size = tail_blocks * exec.block_size_in[i];
             tail.in[i]        = exec.in[i];
             tail.in_stride[i] = exec.in_stride[i];
             tail.in_bump[i]   = exec.in_stride[i] - loop_size;
@@ -457,7 +457,7 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame 
*in, const int y,
 
     for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
         /* Reuse output pointers directly */
-        const size_t loop_size = tail_blocks * exec.block_size_out;
+        const size_t loop_size = tail_blocks * exec.block_size_out[i];
         tail.out[i]        = exec.out[i];
         tail.out_stride[i] = exec.out_stride[i];
         tail.out_bump[i]   = exec.out_stride[i] - loop_size;
@@ -552,8 +552,10 @@ static int compile(SwsGraph *graph, const SwsOpBackend 
*backend,
         goto fail;
     }
 
-    p->exec_base.block_size_in  = block_bits_in  >> 3;
-    p->exec_base.block_size_out = block_bits_out >> 3;
+    for (int i = 0; i < 4; i++) {
+        p->exec_base.block_size_in[i]  = block_bits_in  >> 3;
+        p->exec_base.block_size_out[i] = block_bits_out >> 3;
+    }
 
     for (int i = 0; i < 4; i++) {
         p->idx_in[i]  = i < p->planes_in  ? ops->plane_src[i] : -1;
@@ -602,7 +604,8 @@ static int compile(SwsGraph *graph, const SwsOpBackend 
*backend,
         }
         for (int x = filter->dst_size; x < pixels; x++)
             offset[x] = offset[filter->dst_size - 1];
-        p->exec_base.block_size_in = 0; /* ptr does not advance */
+        for (int i = 0; i < 4; i++)
+            p->exec_base.block_size_in[i] = 0; /* ptr does not advance */
         p->filter_size_h = filter->filter_size;
     }
 
diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h
index 237a036f69..4097b8e947 100644
--- a/libswscale/ops_dispatch.h
+++ b/libswscale/ops_dispatch.h
@@ -54,8 +54,8 @@ typedef struct SwsOpExec {
     /* Extra metadata, may or may not be useful */
     int32_t width, height;      /* Overall output image dimensions */
     int32_t slice_y, slice_h;   /* Start and height of current slice */
-    int32_t block_size_in;      /* Size of a block of pixels in bytes */
-    int32_t block_size_out;
+    int32_t block_size_in[4];   /* Size of a block of pixels in bytes */
+    int32_t block_size_out[4];
 
     /* Subsampling factors for each plane */
     uint8_t in_sub_y[4], out_sub_y[4];
@@ -81,7 +81,7 @@ typedef struct SwsOpExec {
 } SwsOpExec;
 
 static_assert(sizeof(SwsOpExec) == 24 * sizeof(void *) +
-                                   6  * sizeof(int32_t) +
+                                   12 * sizeof(int32_t) +
                                    16 * sizeof(uint8_t) +
                                    2  * sizeof(void *),
               "SwsOpExec layout mismatch");
diff --git a/libswscale/ops_memcpy.c b/libswscale/ops_memcpy.c
index 00f3e79608..26634049dd 100644
--- a/libswscale/ops_memcpy.c
+++ b/libswscale/ops_memcpy.c
@@ -43,12 +43,12 @@ static void process(const SwsOpExec *exec, const void *priv,
 {
     const MemcpyPriv *p = priv;
     const int lines = y_end - y_start;
-    const int bytes = x_end * exec->block_size_out;
     av_assert1(x_start == 0 && x_end == exec->width);
 
     for (int i = 0; i < p->num_planes; i++) {
         uint8_t *out = exec->out[i];
         const int idx = p->index[i];
+        const int bytes = x_end * exec->block_size_out[i];
         const int use_loop = exec->out_stride[i] > bytes + SWS_MAX_PADDING;
         if (idx < 0 && !use_loop) {
             memset(out, p->clear_value[i], exec->out_stride[i] * lines);
diff --git a/libswscale/x86/ops_include.asm b/libswscale/x86/ops_include.asm
index cc44a247b0..073ed31e57 100644
--- a/libswscale/x86/ops_include.asm
+++ b/libswscale/x86/ops_include.asm
@@ -123,8 +123,8 @@ struc SwsOpExec
     .height resd 1
     .slice_y resd 1
     .slice_h resd 1
-    .block_size_in resd 1
-    .block_size_out resd 1
+    .block_size_in resd 4
+    .block_size_out resd 4
     .in_sub_y4 resb 4
     .out_sub_y4 resb 4
     .in_sub_x4 resb 4
diff --git a/tests/checkasm/sw_ops.c b/tests/checkasm/sw_ops.c
index e09635abd4..fdc17bffa8 100644
--- a/tests/checkasm/sw_ops.c
+++ b/tests/checkasm/sw_ops.c
@@ -190,19 +190,19 @@ static void check_compiled(const char *name,
         exec.in_offset_x = in_offset_x;
     }
 
-    exec.block_size_in  = comp_ref->block_size * rw_pixel_bits(read_op)  >> 3;
-    exec.block_size_out = comp_ref->block_size * rw_pixel_bits(write_op) >> 3;
     for (int i = 0; i < NB_PLANES; i++) {
         exec.in[i]  = (void *) src0[i];
         exec.out[i] = (void *) dst0[i];
+        exec.block_size_in[i]  = comp_ref->block_size * rw_pixel_bits(read_op) 
 >> 3;
+        exec.block_size_out[i] = comp_ref->block_size * 
rw_pixel_bits(write_op) >> 3;
     }
     checkasm_call(comp_ref->func, &exec, comp_ref->priv, 0, 0, PIXELS / 
comp_ref->block_size, LINES);
 
-    exec.block_size_in  = comp_new->block_size * rw_pixel_bits(read_op)  >> 3;
-    exec.block_size_out = comp_new->block_size * rw_pixel_bits(write_op) >> 3;
     for (int i = 0; i < NB_PLANES; i++) {
         exec.in[i]  = (void *) src1[i];
         exec.out[i] = (void *) dst1[i];
+        exec.block_size_in[i]  = comp_new->block_size * rw_pixel_bits(read_op) 
 >> 3;
+        exec.block_size_out[i] = comp_new->block_size * 
rw_pixel_bits(write_op) >> 3;
     }
     checkasm_call_checked(comp_new->func, &exec, comp_new->priv, 0, 0, PIXELS 
/ comp_new->block_size, LINES);
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to