This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit a016f34d176bf9c541a6bb5b3e5f8b1dcaae786d Author: Ramiro Polla <[email protected]> AuthorDate: Sat Jun 13 02:19:48 2026 +0200 Commit: Ramiro Polla <[email protected]> CommitDate: Mon Jun 22 13:56:31 2026 +0200 swscale/aarch64/ops: remove redundant linear combinations There is no easy optimization that can be triggered by knowing that the offset is exactly 1. This led to identical functions being instantiated for different params. Also simplified the AVRational comparisons a bit. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/ops_entries.c | 10 ++-------- libswscale/aarch64/ops_impl_conv.c | 5 +++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index bf2d006261..7f60afb484 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -349,14 +349,12 @@ { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, @@ -365,10 +363,6 @@ { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, diff --git a/libswscale/aarch64/ops_impl_conv.c b/libswscale/aarch64/ops_impl_conv.c index b0a286edb6..4e401527cd 100644 --- a/libswscale/aarch64/ops_impl_conv.c +++ b/libswscale/aarch64/ops_impl_conv.c @@ -214,10 +214,11 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, continue; MASK_SET(out->mask, i, 1); for (int j = 0; j < 5; j++) { + const AVRational k = op->lin.m[i][j]; int jj = linear_index_from_sws_op(j); - if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1))) + if (j < 4 && k.num == k.den) LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1); - else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1))) + else if (k.num != 0) LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X); } } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
