PR #23530 opened by Ramiro Polla (ramiro) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23530 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23530.patch
Introduces more specific swizzle operations, and removes some duplicates for read/write/clear/linear. >From 6bd6059aecbae45670d0ebfee44a21423a719ed6 Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Tue, 16 Jun 2026 13:56:20 +0200 Subject: [PATCH 1/4] swscale/aarch64/ops: remove redundant single-component packed read/write These functions are essentially the same as single-component planar read/write, and are actually never instantiated. This was left over from the initial implementation. --- libswscale/aarch64/ops_asmgen.c | 56 +++++---------------------------- 1 file changed, 8 insertions(+), 48 deletions(-) diff --git a/libswscale/aarch64/ops_asmgen.c b/libswscale/aarch64/ops_asmgen.c index c03e0832ee..53501f52ea 100644 --- a/libswscale/aarch64/ops_asmgen.c +++ b/libswscale/aarch64/ops_asmgen.c @@ -449,23 +449,6 @@ static void asmgen_op_read_nibble(SwsAArch64Context *s, const SwsAArch64OpImplPa } } -static void asmgen_op_read_packed_1(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) -{ - RasmContext *r = s->rctx; - AArch64VecViews vl[1]; - AArch64VecViews vh[1]; - - a64op_vec_views(s->vl[0], &vl[0]); - a64op_vec_views(s->vh[0], &vh[0]); - - switch ((s->use_vh ? 0x100 : 0) | s->vec_size) { - case 0x008: i_ldr(r, vl[0].d, a64op_post(s->in[0], s->vec_size * 1)); break; - case 0x010: i_ldr(r, vl[0].q, a64op_post(s->in[0], s->vec_size * 1)); break; - case 0x108: i_ldp(r, vl[0].d, vh[0].d, a64op_post(s->in[0], s->vec_size * 2)); break; - case 0x110: i_ldp(r, vl[0].q, vh[0].q, a64op_post(s->in[0], s->vec_size * 2)); break; - } -} - static void asmgen_op_read_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx) { RasmContext *r = s->rctx; @@ -479,13 +462,10 @@ static void asmgen_op_read_packed_n(SwsAArch64Context *s, const SwsAArch64OpImpl static void asmgen_op_read_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) { - if (p->mask == 0x0001) { - asmgen_op_read_packed_1(s, p); - } else { - asmgen_op_read_packed_n(s, p, s->vl); - if (s->use_vh) - asmgen_op_read_packed_n(s, p, s->vh); - } + av_assert0(p->mask != 0x0001); + asmgen_op_read_packed_n(s, p, s->vl); + if (s->use_vh) + asmgen_op_read_packed_n(s, p, s->vh); } static void asmgen_op_read_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) @@ -574,23 +554,6 @@ static void asmgen_op_write_nibble(SwsAArch64Context *s, const SwsAArch64OpImplP } } -static void asmgen_op_write_packed_1(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) -{ - RasmContext *r = s->rctx; - AArch64VecViews vl[1]; - AArch64VecViews vh[1]; - - a64op_vec_views(s->vl[0], &vl[0]); - a64op_vec_views(s->vh[0], &vh[0]); - - switch ((s->use_vh ? 0x100 : 0) | s->vec_size) { - case 0x008: i_str(r, vl[0].d, a64op_post(s->out[0], s->vec_size * 1)); break; - case 0x010: i_str(r, vl[0].q, a64op_post(s->out[0], s->vec_size * 1)); break; - case 0x108: i_stp(r, vl[0].d, vh[0].d, a64op_post(s->out[0], s->vec_size * 2)); break; - case 0x110: i_stp(r, vl[0].q, vh[0].q, a64op_post(s->out[0], s->vec_size * 2)); break; - } -} - static void asmgen_op_write_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx) { RasmContext *r = s->rctx; @@ -604,13 +567,10 @@ static void asmgen_op_write_packed_n(SwsAArch64Context *s, const SwsAArch64OpImp static void asmgen_op_write_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) { - if (p->mask == 0x0001) { - asmgen_op_write_packed_1(s, p); - } else { - asmgen_op_write_packed_n(s, p, s->vl); - if (s->use_vh) - asmgen_op_write_packed_n(s, p, s->vh); - } + av_assert0(p->mask != 0x0001); + asmgen_op_write_packed_n(s, p, s->vl); + if (s->use_vh) + asmgen_op_write_packed_n(s, p, s->vh); } static void asmgen_op_write_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) -- 2.52.0 >From 462be68c6eebb1ffba087585aa20cf21887e1010 Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Tue, 16 Jun 2026 14:17:16 +0200 Subject: [PATCH 2/4] swscale/aarch64/ops: fix mask for swizzle ops The mask for swizzle ops assumed that merely having a component assigned to itself was enough to detect whether the swizzle was needed for that component, but that wasn't correct. We should also take into account whether the component is needed for the next operation or not. Additionally, prevent duplicate functions from being generated by clearing the swizzle index for unused components. --- libswscale/aarch64/ops_entries.c | 123 ++++++++++++++++++----------- libswscale/aarch64/ops_impl_conv.c | 17 ++-- 2 files changed, 85 insertions(+), 55 deletions(-) diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index 4b8e4bbae1..3cd59f9c3d 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -95,63 +95,92 @@ { .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x000f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x000f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x000f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0231, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x012f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x012f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x012f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x03f2, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0ff1, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0fff, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0fff, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0fff, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x100f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x100f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x100f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1203, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x102f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x102f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x132f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x132f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f0f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f3f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f3f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1f3f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1fff, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20f3, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20f3, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20ff, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1100 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x20ff, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1100 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x210f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x210f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x210f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf00f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf00f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf00f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf021, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf021, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf021, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f2, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f2, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f2, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0f3, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0ff, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf0ff, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf102, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf102, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf102, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf123, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf123, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf123, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf12f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf12f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf12f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf132, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf132, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf132, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf321, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf321, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf321, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf3f2, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xf3f2, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff01, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff01, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff01, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff03, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff03, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff0f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff0f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff0f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff31, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff3f, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff3f, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xff3f, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff1, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff2, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff3, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff3, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0xfff3, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0233, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, diff --git a/libswscale/aarch64/ops_impl_conv.c b/libswscale/aarch64/ops_impl_conv.c index 479afbb3ab..b0a286edb6 100644 --- a/libswscale/aarch64/ops_impl_conv.c +++ b/libswscale/aarch64/ops_impl_conv.c @@ -155,15 +155,16 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, out->type = AARCH64_PIXEL_U32; break; case AARCH64_SWS_OP_SWIZZLE: + /* Recompute mask taking identity swizzle into account */ out->mask = 0; - MASK_SET(out->mask, 0, op->swizzle.in[0] != 0); - MASK_SET(out->mask, 1, op->swizzle.in[1] != 1); - MASK_SET(out->mask, 2, op->swizzle.in[2] != 2); - MASK_SET(out->mask, 3, op->swizzle.in[3] != 3); - MASK_SET(out->swizzle, 0, op->swizzle.in[0]); - MASK_SET(out->swizzle, 1, op->swizzle.in[1]); - MASK_SET(out->swizzle, 2, op->swizzle.in[2]); - MASK_SET(out->swizzle, 3, op->swizzle.in[3]); + for (int i = 0; i < 4; i++) { + if (SWS_OP_NEEDED(op, i) && op->swizzle.in[i] != i) { + MASK_SET(out->mask, i, 1); + MASK_SET(out->swizzle, i, op->swizzle.in[i]); + } else { + MASK_SET(out->swizzle, i, 0xf); + } + } /* The element size and type don't matter. */ out->block_size = block_size * ff_sws_pixel_type_size(op->type); out->type = AARCH64_PIXEL_U8; -- 2.52.0 >From 0960d2fb1c04405c76be1b29e4292d7ac7fe581f Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Sat, 13 Jun 2026 02:19:48 +0200 Subject: [PATCH 3/4] swscale/aarch64/ops: remove redundant linear combinations There is no easy optimization that can be triggered by knowing that the offset is exactly 1. This led to identical functions being instantiated for different params. Also simplified the AVRational comparisons a bit. --- libswscale/aarch64/ops_entries.c | 10 ++-------- libswscale/aarch64/ops_impl_conv.c | 5 +++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index 3cd59f9c3d..3f6bd92b1a 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -349,14 +349,12 @@ { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000ffULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, @@ -365,10 +363,6 @@ { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, diff --git a/libswscale/aarch64/ops_impl_conv.c b/libswscale/aarch64/ops_impl_conv.c index b0a286edb6..4e401527cd 100644 --- a/libswscale/aarch64/ops_impl_conv.c +++ b/libswscale/aarch64/ops_impl_conv.c @@ -214,10 +214,11 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, continue; MASK_SET(out->mask, i, 1); for (int j = 0; j < 5; j++) { + const AVRational k = op->lin.m[i][j]; int jj = linear_index_from_sws_op(j); - if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1))) + if (j < 4 && k.num == k.den) LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1); - else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1))) + else if (k.num != 0) LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X); } } -- 2.52.0 >From 753079b80290d0f7482b41d5692cc044e6338d41 Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Tue, 16 Jun 2026 16:34:52 +0200 Subject: [PATCH 4/4] swscale/aarch64/ops: mark more operations as type-invariant This prevents the generation of a few more duplicate functions (where there would be both f32 and u32 functions). --- libswscale/aarch64/ops_entries.c | 25 +++++++++++-------------- libswscale/aarch64/ops_impl_conv.c | 22 +++++++++++++++++----- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index 3f6bd92b1a..e340d0086d 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -13,9 +13,9 @@ { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, @@ -31,9 +31,8 @@ { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, @@ -50,9 +49,9 @@ { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, @@ -66,9 +65,8 @@ { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, @@ -261,13 +259,12 @@ { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1101 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0010 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0101 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1000 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1010 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1011 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1101 }, -{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, -{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, diff --git a/libswscale/aarch64/ops_impl_conv.c b/libswscale/aarch64/ops_impl_conv.c index 4e401527cd..075569b3b9 100644 --- a/libswscale/aarch64/ops_impl_conv.c +++ b/libswscale/aarch64/ops_impl_conv.c @@ -149,11 +149,6 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, case 4: out->mask = 0x1111; break; }; break; - case AARCH64_SWS_OP_SWAP_BYTES: - /* Only the element size matters, not the type. */ - if (out->type == AARCH64_PIXEL_F32) - out->type = AARCH64_PIXEL_U32; - break; case AARCH64_SWS_OP_SWIZZLE: /* Recompute mask taking identity swizzle into account */ out->mask = 0; @@ -238,5 +233,22 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, break; } + switch (out->op) { + case AARCH64_SWS_OP_READ_BIT: + case AARCH64_SWS_OP_READ_NIBBLE: + case AARCH64_SWS_OP_READ_PACKED: + case AARCH64_SWS_OP_READ_PLANAR: + case AARCH64_SWS_OP_WRITE_BIT: + case AARCH64_SWS_OP_WRITE_NIBBLE: + case AARCH64_SWS_OP_WRITE_PACKED: + case AARCH64_SWS_OP_WRITE_PLANAR: + case AARCH64_SWS_OP_SWAP_BYTES: + case AARCH64_SWS_OP_CLEAR: + /* Only the element size matters, not the type. */ + if (out->type == AARCH64_PIXEL_F32) + out->type = AARCH64_PIXEL_U32; + break; + } + return 0; } -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
