PR #23578 opened by Niklas Haas (haasn) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23578 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23578.patch
Useful for tasks like uops macros generation which does not depend on any specific order (the tree serializes collected uops anyways). >From 7fea93f5e1ef5c23438c29e5905c4f8e498e3320 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Wed, 24 Jun 2026 02:53:23 +0200 Subject: [PATCH 1/2] swscale/ops: use slice threading for ff_sws_enum_op_lists() Enabled by the existing SwsContext.threads option. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops.c | 87 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/libswscale/ops.c b/libswscale/ops.c index 0f52f7a77c..add91a33af 100644 --- a/libswscale/ops.c +++ b/libswscale/ops.c @@ -1047,9 +1047,19 @@ void ff_sws_op_list_print(void *log, int lev, int lev_extra, #define DUMMY_SIZE 16 -static int enum_ops_fmt(SwsContext *ctx, void *opaque, - enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, - int (*cb)(SwsContext *ctx, void *opaque, SwsOpList *ops)) +struct EnumFmtPriv { + SwsContext *ctx; + int (*cb)(SwsContext *ctx, void *opaque, SwsOpList *ops); + void *opaque; + + /* for slice threading */ + enum AVPixelFormat src_start, src_end; + enum AVPixelFormat dst_start, dst_end; +}; + +static int enum_ops_fmt(const struct EnumFmtPriv *s, + enum AVPixelFormat src_fmt, + enum AVPixelFormat dst_fmt) { int ret = 0; SwsOpList *ops = NULL; @@ -1070,7 +1080,7 @@ static int enum_ops_fmt(SwsContext *ctx, void *opaque, dst.width = dst_sizes[i][0]; dst.height = dst_sizes[i][1]; - ret = ff_sws_op_list_generate(ctx, &src, &dst, &ops, &incomplete); + ret = ff_sws_op_list_generate(s->ctx, &src, &dst, &ops, &incomplete); if (ret == AVERROR(ENOTSUP)) return 0; /* silently skip unsupported formats */ else if (ret < 0) @@ -1080,7 +1090,7 @@ static int enum_ops_fmt(SwsContext *ctx, void *opaque, if (ret < 0) goto fail; - ret = cb(ctx, opaque, ops); + ret = s->cb(s->ctx, s->opaque, ops); if (ret < 0) goto fail; @@ -1092,31 +1102,60 @@ fail: return ret; } +static void enum_fmt_slice(void *priv, int jobnr, int threadnr, int nb_jobs, + int nb_threads) +{ + const struct EnumFmtPriv *s = priv; + const enum AVPixelFormat src = s->src_start + jobnr; + for (enum AVPixelFormat dst = s->dst_start; dst <= s->dst_end; dst++) { + int ret = enum_ops_fmt(s, src, dst); + if (ret < 0) + return; + } +} + +static enum AVPixelFormat last_pix_fmt(void) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_next(NULL); + while (1) { + const AVPixFmtDescriptor *next = av_pix_fmt_desc_next(desc); + if (!next) + return av_pix_fmt_desc_get_id(desc); + desc = next; + } +} + int ff_sws_enum_op_lists(SwsContext *ctx, void *opaque, enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, int (*cb)(SwsContext *ctx, void *opaque, SwsOpList *ops)) { - const AVPixFmtDescriptor *src_start = av_pix_fmt_desc_next(NULL); - const AVPixFmtDescriptor *dst_start = src_start; - if (src_fmt != AV_PIX_FMT_NONE) - src_start = av_pix_fmt_desc_get(src_fmt); - if (dst_fmt != AV_PIX_FMT_NONE) - dst_start = av_pix_fmt_desc_get(dst_fmt); + struct EnumFmtPriv s = { + .ctx = ctx, + .cb = cb, + .opaque = opaque, + }; - const AVPixFmtDescriptor *src, *dst; - for (src = src_start; src; src = av_pix_fmt_desc_next(src)) { - const enum AVPixelFormat src_f = av_pix_fmt_desc_get_id(src); - for (dst = dst_start; dst; dst = av_pix_fmt_desc_next(dst)) { - const enum AVPixelFormat dst_f = av_pix_fmt_desc_get_id(dst); - int ret = enum_ops_fmt(ctx, opaque, src_f, dst_f, cb); - if (ret < 0) - return ret; - if (dst_fmt != AV_PIX_FMT_NONE) - break; - } - if (src_fmt != AV_PIX_FMT_NONE) - break; + s.src_start = s.dst_start = AV_PIX_FMT_NONE + 1; + s.src_end = s.dst_end = last_pix_fmt(); + if (src_fmt != AV_PIX_FMT_NONE) + s.src_start = s.src_end = src_fmt; + if (dst_fmt != AV_PIX_FMT_NONE) + s.dst_start = s.dst_end = dst_fmt; + + const int nb_jobs = s.src_end - s.src_start + 1; + if (ctx->threads == 1) { + for (int j = 0; j < nb_jobs; j++) + enum_fmt_slice(&s, j, 0, nb_jobs, 1); + return 0; } + AVSliceThread *slicethread; + int ret = avpriv_slicethread_create(&slicethread, &s, enum_fmt_slice, + NULL, ctx->threads); + if (ret < 0) + return ret; + + avpriv_slicethread_execute(slicethread, nb_jobs, 0); + avpriv_slicethread_free(&slicethread); return 0; } -- 2.52.0 >From c8d35817d556adfd9e0fb41914b549cd8baae088 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Wed, 24 Jun 2026 03:44:57 +0200 Subject: [PATCH 2/2] swscale/uops: use threading to speed up macros generation 26.74s -> 4.58s on my laptop Signed-off-by: Niklas Haas <[email protected]> --- libswscale/uops.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/libswscale/uops.c b/libswscale/uops.c index 096621466c..ca07aac6b8 100644 --- a/libswscale/uops.c +++ b/libswscale/uops.c @@ -23,6 +23,7 @@ #include "libavutil/avassert.h" #include "libavutil/mem.h" #include "libavutil/refstruct.h" +#include "libavutil/thread.h" #include "libavutil/tree.h" #include "ops.h" @@ -873,7 +874,12 @@ int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, return 0; } -static int register_uop(struct AVTreeNode **root, const SwsUOp *uop) +struct EnumPriv { + struct AVTreeNode *root; + pthread_mutex_t lock; +}; + +static int register_uop(struct EnumPriv *s, const SwsUOp *uop) { SwsUOp *key = av_memdup(uop, sizeof(*uop)); if (!key) @@ -886,7 +892,9 @@ static int register_uop(struct AVTreeNode **root, const SwsUOp *uop) return AVERROR(ENOMEM); } - av_tree_insert(root, key, ff_sws_uop_cmp_v, &node); + pthread_mutex_lock(&s->lock); + av_tree_insert(&s->root, key, ff_sws_uop_cmp_v, &node); + pthread_mutex_unlock(&s->lock); if (node) { av_free(node); av_free(key); @@ -904,9 +912,8 @@ static int register_flags(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags fla if (ret < 0) goto fail; - struct AVTreeNode **root = ctx->opaque; for (int i = 0; i < uops->num_ops; i++) { - ret = register_uop(root, &uops->ops[i]); + ret = register_uop(ctx->opaque, &uops->ops[i]); if (ret < 0) goto fail; } @@ -977,16 +984,20 @@ static int free_uop_key(void *opaque, void *key) int ff_sws_uops_macros_gen(char **out_str) { - int ret; - struct AVTreeNode *root = NULL; + struct EnumPriv s = {0}; + int ret = pthread_mutex_init(&s.lock, NULL); + if (ret) + return ret; AVBPrint bprint, *const bp = &bprint; av_bprint_init(bp, 0, AV_BPRINT_SIZE_UNLIMITED); /* Allocate dummy graph and context for ff_sws_compile_pass() */ SwsGraph *graph = ff_sws_graph_alloc(); - if (!graph) - return AVERROR(ENOMEM); + if (!graph) { + ret = AVERROR(ENOMEM); + goto fail; + } SwsContext *ctx = graph->ctx = sws_alloc_context(); if (!ctx) { @@ -994,9 +1005,10 @@ int ff_sws_uops_macros_gen(char **out_str) goto fail; } - /* Use this to plumb the tree state through all the layers of abstraction */ - ctx->opaque = &root; + /* Use this to plumb the enum state through all the layers of abstraction */ + ctx->opaque = &s; ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */ + ctx->threads = 0; /* use slice threading to speed up tree building */ /* Register all unique uops over every relevant combination of flags */ for (int i = 0; i < FF_ARRAY_ELEMS(flags); i++) { @@ -1023,7 +1035,7 @@ int ff_sws_uops_macros_gen(char **out_str) .mask = SWS_COMP_ELEMS(elems), }; - ret = register_uop(&root, &uop); + ret = register_uop(&s, &uop); if (ret < 0) goto fail; } @@ -1060,10 +1072,10 @@ int ff_sws_uops_macros_gen(char **out_str) const char *macro = uop_names[key.uop].macro; const char *prefix = pixel_types[key.type].prefix; av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro); - av_tree_enumerate(root, &key, enum_type, generate_entry_args); + av_tree_enumerate(s.root, &key, enum_type, generate_entry_args); av_bprintf(bp, "\n"); av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, macro); - av_tree_enumerate(root, &key, enum_type, generate_entry_struct); + av_tree_enumerate(s.root, &key, enum_type, generate_entry_struct); av_bprintf(bp, "\n"); } } @@ -1073,8 +1085,9 @@ int ff_sws_uops_macros_gen(char **out_str) fail: av_bprint_finalize(bp, NULL); - av_tree_enumerate(root, NULL, NULL, free_uop_key); - av_tree_destroy(root); + av_tree_enumerate(s.root, NULL, NULL, free_uop_key); + av_tree_destroy(s.root); + pthread_mutex_destroy(&s.lock); ff_sws_graph_free(&graph); sws_free_context(&ctx); return ret; -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
