PR #23412 opened by Ramiro Polla (ramiro) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23412 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23412.patch
Similarly to c29465bcb6 for x86, use plain call/ret pair instead of awkwardly exporting and then jumping back to the return label. Then also remove `AARCH64_SWS_OP_PROCESS` from `SwsAArch64OpType`. There was no good reason to have it there. >From 7c49e70850db969bb75aedd5d41828540c9ec249 Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Wed, 3 Jun 2026 19:40:06 +0200 Subject: [PATCH 1/5] swscale/tests/sws_ops_aarch64: fix skipping of scaling ops Scaling ops were add to ff_sws_enum_op_lists() in 1d841635. But the code that skipped scaling ops in convert_to_aarch64_impl() wasn't taking into consideration that, in sws_ops_aarch64, the scaling ops aren't folded into read ops. Also updates libswscale/aarch64/ops_entries.c with the new entries. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/ops_entries.c | 7 +++++++ libswscale/aarch64/ops_impl_conv.c | 3 +++ libswscale/tests/sws_ops_aarch64.c | 2 ++ 3 files changed, 12 insertions(+) diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index 61ff8bf760..70aad8ae89 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -116,6 +116,7 @@ { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0231, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, { .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, @@ -254,6 +255,7 @@ { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, { .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, @@ -262,6 +264,8 @@ { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0100 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, { .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, @@ -316,6 +320,7 @@ { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, { .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, @@ -375,9 +380,11 @@ { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x3ff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5023, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5032, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5203, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5230, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5ff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5fff, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf000, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf023, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf032, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf203, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, diff --git a/libswscale/aarch64/ops_impl_conv.c b/libswscale/aarch64/ops_impl_conv.c index 48504dc671..a66b91b6fb 100644 --- a/libswscale/aarch64/ops_impl_conv.c +++ b/libswscale/aarch64/ops_impl_conv.c @@ -124,6 +124,9 @@ static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, case SWS_OP_SCALE: out->op = AARCH64_SWS_OP_SCALE; break; case SWS_OP_LINEAR: out->op = AARCH64_SWS_OP_LINEAR; break; case SWS_OP_DITHER: out->op = AARCH64_SWS_OP_DITHER; break; + case SWS_OP_FILTER_H: + case SWS_OP_FILTER_V: + return AVERROR(ENOTSUP); } switch (out->op) { diff --git a/libswscale/tests/sws_ops_aarch64.c b/libswscale/tests/sws_ops_aarch64.c index 21948ca71b..ca6279e8cf 100644 --- a/libswscale/tests/sws_ops_aarch64.c +++ b/libswscale/tests/sws_ops_aarch64.c @@ -118,6 +118,8 @@ static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops) for (int i = 0; i < rest.num_ops; i++) { SwsAArch64OpImplParams params = { 0 }; ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, ¶ms); + if (ret == AVERROR(ENOTSUP)) + continue; if (ret < 0) goto end; ret = aarch64_collect_op(¶ms, root); -- 2.52.0 >From b6c92ed01e9b1272724ca915513273a1bded0f8a Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Mon, 13 Apr 2026 15:14:29 +0200 Subject: [PATCH 2/5] swscale/aarch64/rasm: split conditional and unconditional branch instructions Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/rasm.h | 36 +++++++++++++++++---------------- libswscale/aarch64/rasm_print.c | 3 ++- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/libswscale/aarch64/rasm.h b/libswscale/aarch64/rasm.h index 5a14d8cd64..a91fc3f291 100644 --- a/libswscale/aarch64/rasm.h +++ b/libswscale/aarch64/rasm.h @@ -248,6 +248,7 @@ typedef enum AArch64InsnId { AARCH64_INSN_ADR, AARCH64_INSN_AND, AARCH64_INSN_B, + AARCH64_INSN_BCOND, AARCH64_INSN_BR, AARCH64_INSN_CMP, AARCH64_INSN_CSEL, @@ -537,7 +538,8 @@ static inline RasmOp a64cond_nv(void) { return a64op_cond(AARCH64_COND_NV); } #define i_addv(rctx, op0, op1 ) rasm_add_insn(rctx, AARCH64_INSN_ADDV, op0, op1, OPN, OPN) #define i_adr(rctx, op0, op1 ) rasm_add_insn(rctx, AARCH64_INSN_ADR, op0, op1, OPN, OPN) #define i_and(rctx, op0, op1, op2 ) rasm_add_insn(rctx, AARCH64_INSN_AND, op0, op1, op2, OPN) -#define i_b(rctx, op0, op1 ) rasm_add_insn(rctx, AARCH64_INSN_B, op0, op1, OPN, OPN) +#define i_b(rctx, op0 ) rasm_add_insn(rctx, AARCH64_INSN_B, op0, OPN, OPN, OPN) +#define i_bcond(rctx, op0, op1 ) rasm_add_insn(rctx, AARCH64_INSN_BCOND, op0, op1, OPN, OPN) #define i_br(rctx, op0 ) rasm_add_insn(rctx, AARCH64_INSN_BR, op0, OPN, OPN, OPN) #define i_cmp(rctx, op0, op1 ) rasm_add_insn(rctx, AARCH64_INSN_CMP, op0, op1, OPN, OPN) #define i_csel(rctx, op0, op1, op2, op3) rasm_add_insn(rctx, AARCH64_INSN_CSEL, op0, op1, op2, op3) @@ -592,22 +594,22 @@ static inline RasmOp a64cond_nv(void) { return a64op_cond(AARCH64_COND_NV); } #define i_zip2(rctx, op0, op1, op2 ) rasm_add_insn(rctx, AARCH64_INSN_ZIP2, op0, op1, op2, OPN) /* Branch helpers. */ -#define i_beq(rctx, id) i_b(rctx, a64cond_eq(), rasm_op_label(id)) -#define i_bne(rctx, id) i_b(rctx, a64cond_ne(), rasm_op_label(id)) -#define i_bhs(rctx, id) i_b(rctx, a64cond_hs(), rasm_op_label(id)) -#define i_bcs(rctx, id) i_b(rctx, a64cond_cs(), rasm_op_label(id)) -#define i_blo(rctx, id) i_b(rctx, a64cond_lo(), rasm_op_label(id)) -#define i_bcc(rctx, id) i_b(rctx, a64cond_cc(), rasm_op_label(id)) -#define i_bmi(rctx, id) i_b(rctx, a64cond_mi(), rasm_op_label(id)) -#define i_bpl(rctx, id) i_b(rctx, a64cond_pl(), rasm_op_label(id)) -#define i_bvs(rctx, id) i_b(rctx, a64cond_vs(), rasm_op_label(id)) -#define i_bvc(rctx, id) i_b(rctx, a64cond_vc(), rasm_op_label(id)) -#define i_bhi(rctx, id) i_b(rctx, a64cond_hi(), rasm_op_label(id)) -#define i_bls(rctx, id) i_b(rctx, a64cond_ls(), rasm_op_label(id)) -#define i_bge(rctx, id) i_b(rctx, a64cond_ge(), rasm_op_label(id)) -#define i_blt(rctx, id) i_b(rctx, a64cond_lt(), rasm_op_label(id)) -#define i_bgt(rctx, id) i_b(rctx, a64cond_gt(), rasm_op_label(id)) -#define i_ble(rctx, id) i_b(rctx, a64cond_le(), rasm_op_label(id)) +#define i_beq(rctx, id) i_bcond(rctx, a64cond_eq(), rasm_op_label(id)) +#define i_bne(rctx, id) i_bcond(rctx, a64cond_ne(), rasm_op_label(id)) +#define i_bhs(rctx, id) i_bcond(rctx, a64cond_hs(), rasm_op_label(id)) +#define i_bcs(rctx, id) i_bcond(rctx, a64cond_cs(), rasm_op_label(id)) +#define i_blo(rctx, id) i_bcond(rctx, a64cond_lo(), rasm_op_label(id)) +#define i_bcc(rctx, id) i_bcond(rctx, a64cond_cc(), rasm_op_label(id)) +#define i_bmi(rctx, id) i_bcond(rctx, a64cond_mi(), rasm_op_label(id)) +#define i_bpl(rctx, id) i_bcond(rctx, a64cond_pl(), rasm_op_label(id)) +#define i_bvs(rctx, id) i_bcond(rctx, a64cond_vs(), rasm_op_label(id)) +#define i_bvc(rctx, id) i_bcond(rctx, a64cond_vc(), rasm_op_label(id)) +#define i_bhi(rctx, id) i_bcond(rctx, a64cond_hi(), rasm_op_label(id)) +#define i_bls(rctx, id) i_bcond(rctx, a64cond_ls(), rasm_op_label(id)) +#define i_bge(rctx, id) i_bcond(rctx, a64cond_ge(), rasm_op_label(id)) +#define i_blt(rctx, id) i_bcond(rctx, a64cond_lt(), rasm_op_label(id)) +#define i_bgt(rctx, id) i_bcond(rctx, a64cond_gt(), rasm_op_label(id)) +#define i_ble(rctx, id) i_bcond(rctx, a64cond_le(), rasm_op_label(id)) /* Extra helpers. */ #define i_mov16b(rctx, op0, op1) i_mov(rctx, v_16b(op0), v_16b(op1)) diff --git a/libswscale/aarch64/rasm_print.c b/libswscale/aarch64/rasm_print.c index 86f543b3c9..8f55d87401 100644 --- a/libswscale/aarch64/rasm_print.c +++ b/libswscale/aarch64/rasm_print.c @@ -271,6 +271,7 @@ static const char insn_names[AARCH64_INSN_NB][8] = { [AARCH64_INSN_ADR ] = "adr", [AARCH64_INSN_AND ] = "and", [AARCH64_INSN_B ] = "b", + [AARCH64_INSN_BCOND ] = "b", [AARCH64_INSN_BR ] = "br", [AARCH64_INSN_CMP ] = "cmp", [AARCH64_INSN_CSEL ] = "csel", @@ -342,7 +343,7 @@ static void print_node_insn(const RasmContext *rctx, indent_to(fp, pos, line_start, INSTR_INDENT); int op_start = 0; - if (node->insn.id == AARCH64_INSN_B && rasm_op_type(node->insn.op[0]) == AARCH64_OP_COND) { + if (node->insn.id == AARCH64_INSN_BCOND) { pos_fprintf(fp, pos, "b.%-14s", cond_name(a64op_cond_val(node->insn.op[0]))); op_start = 1; } else if (rasm_op_type(node->insn.op[0]) == RASM_OP_NONE) { -- 2.52.0 >From 9186784764da793be932b1b16548ee5871024831 Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Mon, 13 Apr 2026 15:16:35 +0200 Subject: [PATCH 3/5] swscale/aarch64/rasm: add blr instruction And a64op_lr() helper for LR register. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/rasm.h | 3 +++ libswscale/aarch64/rasm_print.c | 1 + 2 files changed, 4 insertions(+) diff --git a/libswscale/aarch64/rasm.h b/libswscale/aarch64/rasm.h index a91fc3f291..2ced8d0e95 100644 --- a/libswscale/aarch64/rasm.h +++ b/libswscale/aarch64/rasm.h @@ -249,6 +249,7 @@ typedef enum AArch64InsnId { AARCH64_INSN_AND, AARCH64_INSN_B, AARCH64_INSN_BCOND, + AARCH64_INSN_BLR, AARCH64_INSN_BR, AARCH64_INSN_CMP, AARCH64_INSN_CSEL, @@ -351,6 +352,7 @@ static inline uint8_t a64op_gpr_size(RasmOp op) { return op.u8[1]; } static inline RasmOp a64op_gpw(uint8_t n) { return a64op_make_gpr(n, sizeof(uint32_t)); } static inline RasmOp a64op_gpx(uint8_t n) { return a64op_make_gpr(n, sizeof(uint64_t)); } +static inline RasmOp a64op_lr (void) { return a64op_make_gpr(30, sizeof(uint64_t)); } static inline RasmOp a64op_sp (void) { return a64op_make_gpr(31, sizeof(uint64_t)); } /* modifiers */ @@ -540,6 +542,7 @@ static inline RasmOp a64cond_nv(void) { return a64op_cond(AARCH64_COND_NV); } #define i_and(rctx, op0, op1, op2 ) rasm_add_insn(rctx, AARCH64_INSN_AND, op0, op1, op2, OPN) #define i_b(rctx, op0 ) rasm_add_insn(rctx, AARCH64_INSN_B, op0, OPN, OPN, OPN) #define i_bcond(rctx, op0, op1 ) rasm_add_insn(rctx, AARCH64_INSN_BCOND, op0, op1, OPN, OPN) +#define i_blr(rctx, op0 ) rasm_add_insn(rctx, AARCH64_INSN_BLR, op0, OPN, OPN, OPN) #define i_br(rctx, op0 ) rasm_add_insn(rctx, AARCH64_INSN_BR, op0, OPN, OPN, OPN) #define i_cmp(rctx, op0, op1 ) rasm_add_insn(rctx, AARCH64_INSN_CMP, op0, op1, OPN, OPN) #define i_csel(rctx, op0, op1, op2, op3) rasm_add_insn(rctx, AARCH64_INSN_CSEL, op0, op1, op2, op3) diff --git a/libswscale/aarch64/rasm_print.c b/libswscale/aarch64/rasm_print.c index 8f55d87401..ff870f8a27 100644 --- a/libswscale/aarch64/rasm_print.c +++ b/libswscale/aarch64/rasm_print.c @@ -272,6 +272,7 @@ static const char insn_names[AARCH64_INSN_NB][8] = { [AARCH64_INSN_AND ] = "and", [AARCH64_INSN_B ] = "b", [AARCH64_INSN_BCOND ] = "b", + [AARCH64_INSN_BLR ] = "blr", [AARCH64_INSN_BR ] = "br", [AARCH64_INSN_CMP ] = "cmp", [AARCH64_INSN_CSEL ] = "csel", -- 2.52.0 >From 09f687d03db4f81de773980f2d663b3a3c3117a0 Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Mon, 13 Apr 2026 15:28:32 +0200 Subject: [PATCH 4/5] swscale/aarch64/ops: use plain `ret` instruction Use a call/ret pair instead of awkwardly exporting and then jumping back to the return label. This is similar to c29465bcb6, but for aarch64. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/ops.c | 12 +-- libswscale/aarch64/ops_asmgen.c | 124 ++++++++++++++++------------- libswscale/aarch64/ops_entries.c | 4 - libswscale/aarch64/ops_impl.c | 3 - libswscale/aarch64/ops_impl.h | 1 - libswscale/tests/sws_ops_aarch64.c | 7 +- 6 files changed, 73 insertions(+), 78 deletions(-) diff --git a/libswscale/aarch64/ops.c b/libswscale/aarch64/ops.c index 4598a8db6b..c9d0ef58f1 100644 --- a/libswscale/aarch64/ops.c +++ b/libswscale/aarch64/ops.c @@ -220,7 +220,7 @@ static int aarch64_compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out) goto error; } - /* Look up process/process_return functions. */ + /* Look up process function. */ const SwsOp *read = ff_sws_op_list_input(&rest); const SwsOp *write = ff_sws_op_list_output(&rest); const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; @@ -229,19 +229,13 @@ static int aarch64_compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out) for (int i = 0; i < FFMAX(read_planes, write_planes); i++) MASK_SET(mask, i, 1); - SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask }; - SwsAArch64OpImplParams return_params = { .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = mask }; + SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask }; SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params); - SwsFuncPtr return_func = ff_sws_aarch64_lookup(&return_params); - if (!process_func || !return_func) { + if (!process_func) { ret = AVERROR(ENOTSUP); goto error; } - ret = ff_sws_op_chain_append(chain, return_func, NULL, &(SwsOpPriv) { 0 }); - if (ret < 0) - goto error; - out->func = (SwsOpFunc) process_func; out->cpu_flags = chain->cpu_flags; diff --git a/libswscale/aarch64/ops_asmgen.c b/libswscale/aarch64/ops_asmgen.c index e88a162de1..a1e379967d 100644 --- a/libswscale/aarch64/ops_asmgen.c +++ b/libswscale/aarch64/ops_asmgen.c @@ -260,14 +260,14 @@ static void asmgen_epilogue(SwsAArch64Context *s, const RasmOp *regs, unsigned n } /*********************************************************************/ -/* Callee-saved registers (r19-r28). */ -#define MAX_SAVED_REGS 10 +/* Callee-saved registers (r19-r28, fp, and lr). */ +#define MAX_SAVED_REGS 12 static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr) { const int n = a64op_gpr_n(gpr); - if (n >= 19 && n <= 28) + if (n >= 19 && n <= 30) regs[(*count)++] = gpr; } @@ -276,6 +276,7 @@ static unsigned clobbered_gprs(const SwsAArch64Context *s, RasmOp regs[MAX_SAVED_REGS]) { unsigned count = 0; + clobber_gpr(regs, &count, a64op_lr()); LOOP_MASK(p, i) { clobber_gpr(regs, &count, s->in[i]); clobber_gpr(regs, &count, s->out[i]); @@ -292,9 +293,8 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p char buf[64]; /** - * The process/process_return functions for aarch64 work similarly - * to the x86 backend. The description in x86/ops_common.asm mostly - * holds as well here. + * The process function for aarch64 works similarly to the x86 backend. + * The description in x86/ops_common.asm mostly holds as well here. */ aarch64_op_impl_func_name(func_name, sizeof(func_name), p); @@ -329,49 +329,38 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p i_ldr(r, s->out_bump[i], a64op_off(s->exec, offsetof_exec_out_bump + (i * sizeof(ptrdiff_t)))); } - /* Reset x and jump to first kernel. */ - i_mov(r, s->bx, s->bx_start); CMT("bx = bx_start;"); - i_mov(r, s->impl, s->op1_impl); CMT("impl = op1_impl;"); - i_br (r, s->op0_func); CMT("jump to op0_func"); -} + int first_row = rasm_new_label(r, NULL); + int next_row = rasm_new_label(r, NULL); + int next_block = rasm_new_label(r, NULL); -static void asmgen_process_return(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) -{ - RasmContext *r = s->rctx; - char func_name[128]; + /* Jump to first row (skips padding). */ + i_b (r, rasm_op_label(first_row)); CMT("goto first_row;"); - aarch64_op_impl_func_name(func_name, sizeof(func_name), p); - - rasm_func_begin(r, func_name, true, true); - - /* Reset impl to first kernel. */ - i_mov(r, s->impl, s->op1_impl); CMT("impl = op1_impl;"); - - /* Perform horizontal loop. */ - int loop = rasm_new_label(r, NULL); - i_add(r, s->bx, s->bx, IMM(1)); CMT("bx += 1;"); - i_cmp(r, s->bx, s->bx_end); CMT("if (bx != bx_end)"); - i_bne(r, loop); CMT(" goto loop;"); - - /* Perform vertical loop. */ - int end = rasm_new_label(r, NULL); - i_add(r, s->y, s->y, IMM(1)); CMT("y += 1;"); - i_cmp(r, s->y, s->y_end); CMT("if (y == y_end)"); - i_beq(r, end); CMT(" goto end;"); - - /* Perform padding and reset x, preparing for next row. */ + /* Perform padding, preparing for next row. */ + rasm_add_label(r, next_row); CMT("next_row:"); LOOP_MASK(p, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); } LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); } + + /* First row (reset x). */ + rasm_add_label(r, first_row); CMT("first_row:"); i_mov(r, s->bx, s->bx_start); CMT("bx = bx_start;"); - /* Loop back or end of function. */ - rasm_add_label(r, loop); CMT("loop:"); - i_br (r, s->op0_func); CMT("jump to op0_func"); - rasm_add_label(r, end); CMT("end:"); + /* Reset impl and call first kernel. */ + rasm_add_label(r, next_block); CMT("next_block:"); + i_mov(r, s->impl, s->op1_impl); CMT("impl = op1_impl;"); + i_blr(r, s->op0_func); CMT("op0_func();"); + + /* Perform horizontal loop. */ + i_add(r, s->bx, s->bx, IMM(1)); CMT("bx += 1;"); + i_cmp(r, s->bx, s->bx_end); CMT("if (bx != bx_end)"); + i_bne(r, next_block); CMT(" goto next_block;"); + + /* Perform vertical loop. */ + i_add(r, s->y, s->y, IMM(1)); CMT("y += 1;"); + i_cmp(r, s->y, s->y_end); CMT("if (y != y_end)"); + i_bne(r, next_row); CMT(" goto next_row;"); /* Function epilogue */ - RasmOp saved_regs[MAX_SAVED_REGS]; - unsigned nsaved = clobbered_gprs(s, p, saved_regs); if (nsaved) asmgen_epilogue(s, saved_regs, nsaved); @@ -1367,9 +1356,28 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) { RasmContext *r = s->rctx; + bool is_read = false; + bool is_write = false; + switch (p->op) { + case AARCH64_SWS_OP_READ_BIT: + case AARCH64_SWS_OP_READ_NIBBLE: + case AARCH64_SWS_OP_READ_PACKED: + case AARCH64_SWS_OP_READ_PLANAR: + is_read = true; + break; + case AARCH64_SWS_OP_WRITE_BIT: + case AARCH64_SWS_OP_WRITE_NIBBLE: + case AARCH64_SWS_OP_WRITE_PACKED: + case AARCH64_SWS_OP_WRITE_PLANAR: + is_write = true; + break; + default: + break; + } + char func_name[128]; aarch64_op_impl_func_name(func_name, sizeof(func_name), p); - rasm_func_begin(r, func_name, true, true); + rasm_func_begin(r, func_name, true, !is_read); /** * Set up vector register dimensions and reshape all vectors @@ -1416,14 +1424,18 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) break; } - /* Load continuation address and increment impl pointer. */ - RasmNode *node = rasm_set_current_node(r, s->load_cont_node); - RasmOp impl_post = a64op_post(s->impl, sizeof_impl); - i_ldr(r, s->cont, impl_post); CMT("SwsFuncPtr cont = (impl++)->cont;"); - rasm_set_current_node(r, node); - - /* Common end for CPS functions. */ - i_br (r, s->cont); CMT("jump to cont"); + if (is_write) { + /* Write functions return directly. */ + i_ret(r); + } else { + /* Load continuation address and increment impl pointer. */ + RasmNode *node = rasm_set_current_node(r, s->load_cont_node); + RasmOp impl_post = a64op_post(s->impl, sizeof_impl); + i_ldr(r, s->cont, impl_post); CMT("SwsFuncPtr cont = (impl++)->cont;"); + rasm_set_current_node(r, node); + /* Common end for remaining CPS functions. */ + i_br (r, s->cont); CMT("jump to cont"); + } } static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) @@ -1432,9 +1444,6 @@ static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) case AARCH64_SWS_OP_PROCESS: asmgen_process(s, p); break; - case AARCH64_SWS_OP_PROCESS_RETURN: - asmgen_process_return(s, p); - break; default: asmgen_op_cps(s, p); break; @@ -1561,9 +1570,11 @@ static int asmgen(void) /** * The entry point of the SwsOpFunc is the `process` function. The + * first kernel function is called from `process`, and subsequent * kernel functions are chained by directly branching to the next - * operation, using a continuation-passing style design. The exit - * point of the SwsOpFunc is the `process_return` function. + * operation, using a continuation-passing style design. The last + * operation must be a write operation, which returns from the call + * to the `process` function. * * The GPRs used by the entire call-chain are listed below. * @@ -1586,6 +1597,9 @@ static int asmgen(void) * The read/write data pointers and padding values first use up the * remaining free caller-saved registers, and only then are the * caller-saved registers (r19-r28) used. + * + * The Link Register (r30) is used when calling the first kernel, + * so it must be saved. */ /* SwsOpFunc arguments. */ diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index 70aad8ae89..ae30ca8b57 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -7,10 +7,6 @@ { .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, diff --git a/libswscale/aarch64/ops_impl.c b/libswscale/aarch64/ops_impl.c index f7e7b18dcf..26d6a8d954 100644 --- a/libswscale/aarch64/ops_impl.c +++ b/libswscale/aarch64/ops_impl.c @@ -77,7 +77,6 @@ static const char *aarch64_pixel_type_name(SwsAArch64PixelType fmt) static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = { [AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE", [AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS", - [AARCH64_SWS_OP_PROCESS_RETURN] = "AARCH64_SWS_OP_PROCESS_RETURN", [AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT", [AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE", [AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED", @@ -114,7 +113,6 @@ static const char *aarch64_op_type(SwsAArch64OpType op) static const char op_type_names[AARCH64_SWS_OP_TYPE_NB][16] = { [AARCH64_SWS_OP_NONE ] = "none", [AARCH64_SWS_OP_PROCESS ] = "process", - [AARCH64_SWS_OP_PROCESS_RETURN] = "process_return", [AARCH64_SWS_OP_READ_BIT ] = "read_bit", [AARCH64_SWS_OP_READ_NIBBLE ] = "read_nibble", [AARCH64_SWS_OP_READ_PACKED ] = "read_packed", @@ -326,7 +324,6 @@ static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2) #define MAX_LEVELS 8 static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = { [AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask }, - [AARCH64_SWS_OP_PROCESS_RETURN] = { &field_op, &field_mask }, [AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, diff --git a/libswscale/aarch64/ops_impl.h b/libswscale/aarch64/ops_impl.h index 67c4672812..f0bbc9f697 100644 --- a/libswscale/aarch64/ops_impl.h +++ b/libswscale/aarch64/ops_impl.h @@ -38,7 +38,6 @@ typedef enum SwsAArch64PixelType { typedef enum SwsAArch64OpType { AARCH64_SWS_OP_NONE = 0, AARCH64_SWS_OP_PROCESS, - AARCH64_SWS_OP_PROCESS_RETURN, AARCH64_SWS_OP_READ_BIT, AARCH64_SWS_OP_READ_NIBBLE, AARCH64_SWS_OP_READ_PACKED, diff --git a/libswscale/tests/sws_ops_aarch64.c b/libswscale/tests/sws_ops_aarch64.c index ca6279e8cf..84300c6af4 100644 --- a/libswscale/tests/sws_ops_aarch64.c +++ b/libswscale/tests/sws_ops_aarch64.c @@ -72,7 +72,7 @@ error: return ret; } -/* Collect the parameters for the process/process_return functions. */ +/* Collect the parameters for the process function. */ static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root) { const SwsOp *read = ff_sws_op_list_input(ops); @@ -89,11 +89,6 @@ static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **roo .mask = mask, }; - ret = aarch64_collect_op(¶ms, root); - if (ret < 0) - return ret; - - params.op = AARCH64_SWS_OP_PROCESS_RETURN; ret = aarch64_collect_op(¶ms, root); if (ret < 0) return ret; -- 2.52.0 >From 1f87adf7b90591f8817ce8be17896ce8fdd4709e Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Mon, 8 Jun 2026 21:10:03 +0200 Subject: [PATCH 5/5] swscale/aarch64/ops: simplify process function generation There was no good reason to have it as an SwsAArch64OpType. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/ops.c | 22 +++++++++------- libswscale/aarch64/ops_asmgen.c | 42 +++++++++++++----------------- libswscale/aarch64/ops_entries.c | 4 --- libswscale/aarch64/ops_impl.c | 3 --- libswscale/aarch64/ops_impl.h | 1 - libswscale/tests/sws_ops_aarch64.c | 28 -------------------- 6 files changed, 30 insertions(+), 70 deletions(-) diff --git a/libswscale/aarch64/ops.c b/libswscale/aarch64/ops.c index c9d0ef58f1..366f23efdf 100644 --- a/libswscale/aarch64/ops.c +++ b/libswscale/aarch64/ops.c @@ -221,22 +221,24 @@ static int aarch64_compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out) } /* Look up process function. */ + void ff_sws_process_0001_neon(void); + void ff_sws_process_0011_neon(void); + void ff_sws_process_0111_neon(void); + void ff_sws_process_1111_neon(void); + const SwsOp *read = ff_sws_op_list_input(&rest); const SwsOp *write = ff_sws_op_list_output(&rest); const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; const int write_planes = write->rw.packed ? 1 : write->rw.elems; - SwsAArch64OpMask mask = 0; - for (int i = 0; i < FFMAX(read_planes, write_planes); i++) - MASK_SET(mask, i, 1); - - SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask }; - SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params); - if (!process_func) { - ret = AVERROR(ENOTSUP); - goto error; + SwsOpFunc process_func = NULL; + switch (FFMAX(read_planes, write_planes)) { + case 1: process_func = (SwsOpFunc) ff_sws_process_0001_neon; break; + case 2: process_func = (SwsOpFunc) ff_sws_process_0011_neon; break; + case 3: process_func = (SwsOpFunc) ff_sws_process_0111_neon; break; + case 4: process_func = (SwsOpFunc) ff_sws_process_1111_neon; break; } - out->func = (SwsOpFunc) process_func; + out->func = process_func; out->cpu_flags = chain->cpu_flags; error: diff --git a/libswscale/aarch64/ops_asmgen.c b/libswscale/aarch64/ops_asmgen.c index a1e379967d..2c043dad65 100644 --- a/libswscale/aarch64/ops_asmgen.c +++ b/libswscale/aarch64/ops_asmgen.c @@ -272,12 +272,12 @@ static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count, } static unsigned clobbered_gprs(const SwsAArch64Context *s, - const SwsAArch64OpImplParams *p, + SwsAArch64OpMask mask, RasmOp regs[MAX_SAVED_REGS]) { unsigned count = 0; clobber_gpr(regs, &count, a64op_lr()); - LOOP_MASK(p, i) { + LOOP(mask, i) { clobber_gpr(regs, &count, s->in[i]); clobber_gpr(regs, &count, s->out[i]); clobber_gpr(regs, &count, s->in_bump[i]); @@ -286,7 +286,7 @@ static unsigned clobbered_gprs(const SwsAArch64Context *s, return count; } -static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) +static void asmgen_process(SwsAArch64Context *s, SwsAArch64OpMask mask) { RasmContext *r = s->rctx; char func_name[128]; @@ -297,13 +297,13 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p * The description in x86/ops_common.asm mostly holds as well here. */ - aarch64_op_impl_func_name(func_name, sizeof(func_name), p); + snprintf(func_name, sizeof(func_name), "ff_sws_process_%04x_neon", mask); rasm_func_begin(r, func_name, true, false); /* Function prologue */ RasmOp saved_regs[MAX_SAVED_REGS]; - unsigned nsaved = clobbered_gprs(s, p, saved_regs); + unsigned nsaved = clobbered_gprs(s, mask, saved_regs); if (nsaved) asmgen_prologue(s, saved_regs, nsaved); @@ -312,19 +312,19 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p i_add(r, s->op1_impl, s->impl, IMM(sizeof_impl)); CMT("SwsOpImpl *op1_impl = impl + 1;"); /* Load values from exec. */ - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "in[%u] = exec->in[%u];", i, i); i_ldr(r, s->in[i], a64op_off(s->exec, offsetof_exec_in + (i * sizeof(uint8_t *)))); } - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "out[%u] = exec->out[%u];", i, i); i_ldr(r, s->out[i], a64op_off(s->exec, offsetof_exec_out + (i * sizeof(uint8_t *)))); } - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "in_bump[%u] = exec->in_bump[%u];", i, i); i_ldr(r, s->in_bump[i], a64op_off(s->exec, offsetof_exec_in_bump + (i * sizeof(ptrdiff_t)))); } - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "out_bump[%u] = exec->out_bump[%u];", i, i); i_ldr(r, s->out_bump[i], a64op_off(s->exec, offsetof_exec_out_bump + (i * sizeof(ptrdiff_t)))); } @@ -338,8 +338,8 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p /* Perform padding, preparing for next row. */ rasm_add_label(r, next_row); CMT("next_row:"); - LOOP_MASK(p, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); } - LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); } + LOOP(mask, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); } + LOOP(mask, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); } /* First row (reset x). */ rasm_add_label(r, first_row); CMT("first_row:"); @@ -1438,18 +1438,6 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) } } -static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) -{ - switch (p->op) { - case AARCH64_SWS_OP_PROCESS: - asmgen_process(s, p); - break; - default: - asmgen_op_cps(s, p); - break; - } -} - /*********************************************************************/ static void aarch64_op_impl_lookup_str(char *buf, size_t size, const SwsAArch64OpImplParams *params, const SwsAArch64OpImplParams *prev, const char *p_str) @@ -1641,10 +1629,16 @@ static int asmgen(void) s.in_bump [3] = a64op_gpx(26); s.out_bump[3] = a64op_gpx(27); + /* Generate all process functions using rasm. */ + asmgen_process(&s, 0x0001); + asmgen_process(&s, 0x0011); + asmgen_process(&s, 0x0111); + asmgen_process(&s, 0x1111); + /* Generate all functions from ops_entries.c using rasm. */ const SwsAArch64OpImplParams *params = impl_params; while (params->op) { - asmgen_op(&s, params++); + asmgen_op_cps(&s, params++); if (rctx->error) { ret = rctx->error; goto error; diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index ae30ca8b57..04a665a9f1 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -3,10 +3,6 @@ * To regenerate, run: make sws_ops_entries_aarch64 */ -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, diff --git a/libswscale/aarch64/ops_impl.c b/libswscale/aarch64/ops_impl.c index 26d6a8d954..d5be4563c6 100644 --- a/libswscale/aarch64/ops_impl.c +++ b/libswscale/aarch64/ops_impl.c @@ -76,7 +76,6 @@ static const char *aarch64_pixel_type_name(SwsAArch64PixelType fmt) /*********************************************************************/ static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = { [AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE", - [AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS", [AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT", [AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE", [AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED", @@ -112,7 +111,6 @@ static const char *aarch64_op_type(SwsAArch64OpType op) static const char op_type_names[AARCH64_SWS_OP_TYPE_NB][16] = { [AARCH64_SWS_OP_NONE ] = "none", - [AARCH64_SWS_OP_PROCESS ] = "process", [AARCH64_SWS_OP_READ_BIT ] = "read_bit", [AARCH64_SWS_OP_READ_NIBBLE ] = "read_nibble", [AARCH64_SWS_OP_READ_PACKED ] = "read_packed", @@ -323,7 +321,6 @@ static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2) /* Fields needed to uniquely identify each SwsAArch64OpType. */ #define MAX_LEVELS 8 static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = { - [AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask }, [AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, diff --git a/libswscale/aarch64/ops_impl.h b/libswscale/aarch64/ops_impl.h index f0bbc9f697..9ccacc60e7 100644 --- a/libswscale/aarch64/ops_impl.h +++ b/libswscale/aarch64/ops_impl.h @@ -37,7 +37,6 @@ typedef enum SwsAArch64PixelType { /* Similar to SwsOpType */ typedef enum SwsAArch64OpType { AARCH64_SWS_OP_NONE = 0, - AARCH64_SWS_OP_PROCESS, AARCH64_SWS_OP_READ_BIT, AARCH64_SWS_OP_READ_NIBBLE, AARCH64_SWS_OP_READ_PACKED, diff --git a/libswscale/tests/sws_ops_aarch64.c b/libswscale/tests/sws_ops_aarch64.c index 84300c6af4..4fa10c7bb0 100644 --- a/libswscale/tests/sws_ops_aarch64.c +++ b/libswscale/tests/sws_ops_aarch64.c @@ -72,30 +72,6 @@ error: return ret; } -/* Collect the parameters for the process function. */ -static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root) -{ - const SwsOp *read = ff_sws_op_list_input(ops); - const SwsOp *write = ff_sws_op_list_output(ops); - const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; - const int write_planes = write->rw.packed ? 1 : write->rw.elems; - int ret; - - SwsAArch64OpMask mask = 0; - for (int i = 0; i < FFMAX(read_planes, write_planes); i++) - MASK_SET(mask, i, 1); - SwsAArch64OpImplParams params = { - .op = AARCH64_SWS_OP_PROCESS, - .mask = mask, - }; - - ret = aarch64_collect_op(¶ms, root); - if (ret < 0) - return ret; - - return 0; -} - static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops) { struct AVTreeNode **root = (struct AVTreeNode **) opaque; @@ -106,10 +82,6 @@ static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops) /* Use at most two full vregs during the widest precision section */ int block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16; - ret = aarch64_collect_process(&rest, root); - if (ret < 0) - return ret; - for (int i = 0; i < rest.num_ops; i++) { SwsAArch64OpImplParams params = { 0 }; ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, ¶ms); -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
