ops: remove redundant linear combinations

Ramiro Polla via ffmpeg-cvslog Mon, 22 Jun 2026 04:58:44 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit a016f34d176bf9c541a6bb5b3e5f8b1dcaae786d
Author:     Ramiro Polla <[email protected]>
AuthorDate: Sat Jun 13 02:19:48 2026 +0200
Commit:     Ramiro Polla <[email protected]>
CommitDate: Mon Jun 22 13:56:31 2026 +0200

    swscale/aarch64/ops: remove redundant linear combinations
    
    There is no easy optimization that can be triggered by knowing that the
    offset is exactly 1. This led to identical functions being instantiated
    for different params.
    
    Also simplified the AVRational comparisons a bit.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Ramiro Polla <[email protected]>
---
 libswscale/aarch64/ops_entries.c   | 10 ++--------
 libswscale/aarch64/ops_impl_conv.c |  5 +++--
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c
index bf2d006261..7f60afb484 100644
--- a/libswscale/aarch64/ops_entries.c
+++ b/libswscale/aarch64/ops_entries.c
@@ -349,14 +349,12 @@
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, 
.mask = 0x0111 },
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, 
.mask = 0x0001 },
 { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, 
.mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
+{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
@@ -365,10 +363,6 @@
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 
1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
 { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 
0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 },
diff --git a/libswscale/aarch64/ops_impl_conv.c 
b/libswscale/aarch64/ops_impl_conv.c
index b0a286edb6..4e401527cd 100644
--- a/libswscale/aarch64/ops_impl_conv.c
+++ b/libswscale/aarch64/ops_impl_conv.c
@@ -214,10 +214,11 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const 
SwsOpList *ops, int n,
                 continue;
             MASK_SET(out->mask, i, 1);
             for (int j = 0; j < 5; j++) {
+                const AVRational k = op->lin.m[i][j];
                 int jj = linear_index_from_sws_op(j);
-                if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1)))
+                if (j < 4 && k.num == k.den)
                     LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1);
-                else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1)))
+                else if (k.num != 0)
                     LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X);
             }
         }

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 03/04: swscale/aarch64/ops: remove redundant linear combinations

Reply via email to