PR #23578 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23578
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23578.patch

Useful for tasks like uops macros generation which does not depend on any 
specific order (the tree serializes collected uops anyways).


>From 7fea93f5e1ef5c23438c29e5905c4f8e498e3320 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Wed, 24 Jun 2026 02:53:23 +0200
Subject: [PATCH 1/2] swscale/ops: use slice threading for
 ff_sws_enum_op_lists()

Enabled by the existing SwsContext.threads option.

Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops.c | 87 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 24 deletions(-)

diff --git a/libswscale/ops.c b/libswscale/ops.c
index 0f52f7a77c..add91a33af 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -1047,9 +1047,19 @@ void ff_sws_op_list_print(void *log, int lev, int 
lev_extra,
 
 #define DUMMY_SIZE 16
 
-static int enum_ops_fmt(SwsContext *ctx, void *opaque,
-                        enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt,
-                        int (*cb)(SwsContext *ctx, void *opaque, SwsOpList 
*ops))
+struct EnumFmtPriv {
+    SwsContext *ctx;
+    int (*cb)(SwsContext *ctx, void *opaque, SwsOpList *ops);
+    void *opaque;
+
+    /* for slice threading */
+    enum AVPixelFormat src_start, src_end;
+    enum AVPixelFormat dst_start, dst_end;
+};
+
+static int enum_ops_fmt(const struct EnumFmtPriv *s,
+                        enum AVPixelFormat src_fmt,
+                        enum AVPixelFormat dst_fmt)
 {
     int ret = 0;
     SwsOpList *ops = NULL;
@@ -1070,7 +1080,7 @@ static int enum_ops_fmt(SwsContext *ctx, void *opaque,
         dst.width  = dst_sizes[i][0];
         dst.height = dst_sizes[i][1];
 
-        ret = ff_sws_op_list_generate(ctx, &src, &dst, &ops, &incomplete);
+        ret = ff_sws_op_list_generate(s->ctx, &src, &dst, &ops, &incomplete);
         if (ret == AVERROR(ENOTSUP))
             return 0; /* silently skip unsupported formats */
         else if (ret < 0)
@@ -1080,7 +1090,7 @@ static int enum_ops_fmt(SwsContext *ctx, void *opaque,
         if (ret < 0)
             goto fail;
 
-        ret = cb(ctx, opaque, ops);
+        ret = s->cb(s->ctx, s->opaque, ops);
         if (ret < 0)
             goto fail;
 
@@ -1092,31 +1102,60 @@ fail:
     return ret;
 }
 
+static void enum_fmt_slice(void *priv, int jobnr, int threadnr, int nb_jobs,
+                           int nb_threads)
+{
+    const struct EnumFmtPriv *s = priv;
+    const enum AVPixelFormat src = s->src_start + jobnr;
+    for (enum AVPixelFormat dst = s->dst_start; dst <= s->dst_end; dst++) {
+        int ret = enum_ops_fmt(s, src, dst);
+        if (ret < 0)
+            return;
+    }
+}
+
+static enum AVPixelFormat last_pix_fmt(void)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_next(NULL);
+    while (1) {
+        const AVPixFmtDescriptor *next = av_pix_fmt_desc_next(desc);
+        if (!next)
+            return av_pix_fmt_desc_get_id(desc);
+        desc = next;
+    }
+}
+
 int ff_sws_enum_op_lists(SwsContext *ctx, void *opaque,
                          enum AVPixelFormat src_fmt, enum AVPixelFormat 
dst_fmt,
                          int (*cb)(SwsContext *ctx, void *opaque, SwsOpList 
*ops))
 {
-    const AVPixFmtDescriptor *src_start = av_pix_fmt_desc_next(NULL);
-    const AVPixFmtDescriptor *dst_start = src_start;
-    if (src_fmt != AV_PIX_FMT_NONE)
-        src_start = av_pix_fmt_desc_get(src_fmt);
-    if (dst_fmt != AV_PIX_FMT_NONE)
-        dst_start = av_pix_fmt_desc_get(dst_fmt);
+    struct EnumFmtPriv s = {
+        .ctx    = ctx,
+        .cb     = cb,
+        .opaque = opaque,
+    };
 
-    const AVPixFmtDescriptor *src, *dst;
-    for (src = src_start; src; src = av_pix_fmt_desc_next(src)) {
-        const enum AVPixelFormat src_f = av_pix_fmt_desc_get_id(src);
-        for (dst = dst_start; dst; dst = av_pix_fmt_desc_next(dst)) {
-            const enum AVPixelFormat dst_f = av_pix_fmt_desc_get_id(dst);
-            int ret = enum_ops_fmt(ctx, opaque, src_f, dst_f, cb);
-            if (ret < 0)
-                return ret;
-            if (dst_fmt != AV_PIX_FMT_NONE)
-                break;
-        }
-        if (src_fmt != AV_PIX_FMT_NONE)
-            break;
+    s.src_start = s.dst_start = AV_PIX_FMT_NONE + 1;
+    s.src_end   = s.dst_end   = last_pix_fmt();
+    if (src_fmt != AV_PIX_FMT_NONE)
+        s.src_start = s.src_end = src_fmt;
+    if (dst_fmt != AV_PIX_FMT_NONE)
+        s.dst_start = s.dst_end = dst_fmt;
+
+    const int nb_jobs = s.src_end - s.src_start + 1;
+    if (ctx->threads == 1) {
+        for (int j = 0; j < nb_jobs; j++)
+            enum_fmt_slice(&s, j, 0, nb_jobs, 1);
+        return 0;
     }
 
+    AVSliceThread *slicethread;
+    int ret = avpriv_slicethread_create(&slicethread, &s, enum_fmt_slice,
+                                        NULL, ctx->threads);
+    if (ret < 0)
+        return ret;
+
+    avpriv_slicethread_execute(slicethread, nb_jobs, 0);
+    avpriv_slicethread_free(&slicethread);
     return 0;
 }
-- 
2.52.0


>From c8d35817d556adfd9e0fb41914b549cd8baae088 Mon Sep 17 00:00:00 2001
From: Niklas Haas <[email protected]>
Date: Wed, 24 Jun 2026 03:44:57 +0200
Subject: [PATCH 2/2] swscale/uops: use threading to speed up macros generation

26.74s -> 4.58s on my laptop

Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/uops.c | 43 ++++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/libswscale/uops.c b/libswscale/uops.c
index 096621466c..ca07aac6b8 100644
--- a/libswscale/uops.c
+++ b/libswscale/uops.c
@@ -23,6 +23,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/refstruct.h"
+#include "libavutil/thread.h"
 #include "libavutil/tree.h"
 
 #include "ops.h"
@@ -873,7 +874,12 @@ int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList 
*ops,
     return 0;
 }
 
-static int register_uop(struct AVTreeNode **root, const SwsUOp *uop)
+struct EnumPriv {
+    struct AVTreeNode *root;
+    pthread_mutex_t lock;
+};
+
+static int register_uop(struct EnumPriv *s, const SwsUOp *uop)
 {
     SwsUOp *key = av_memdup(uop, sizeof(*uop));
     if (!key)
@@ -886,7 +892,9 @@ static int register_uop(struct AVTreeNode **root, const 
SwsUOp *uop)
         return AVERROR(ENOMEM);
     }
 
-    av_tree_insert(root, key, ff_sws_uop_cmp_v, &node);
+    pthread_mutex_lock(&s->lock);
+    av_tree_insert(&s->root, key, ff_sws_uop_cmp_v, &node);
+    pthread_mutex_unlock(&s->lock);
     if (node) {
         av_free(node);
         av_free(key);
@@ -904,9 +912,8 @@ static int register_flags(SwsContext *ctx, const SwsOpList 
*ops, SwsUOpFlags fla
     if (ret < 0)
         goto fail;
 
-    struct AVTreeNode **root = ctx->opaque;
     for (int i = 0; i < uops->num_ops; i++) {
-        ret = register_uop(root, &uops->ops[i]);
+        ret = register_uop(ctx->opaque, &uops->ops[i]);
         if (ret < 0)
             goto fail;
     }
@@ -977,16 +984,20 @@ static int free_uop_key(void *opaque, void *key)
 
 int ff_sws_uops_macros_gen(char **out_str)
 {
-    int ret;
-    struct AVTreeNode *root = NULL;
+    struct EnumPriv s = {0};
+    int ret = pthread_mutex_init(&s.lock, NULL);
+    if (ret)
+        return ret;
 
     AVBPrint bprint, *const bp = &bprint;
     av_bprint_init(bp, 0, AV_BPRINT_SIZE_UNLIMITED);
 
     /* Allocate dummy graph and context for ff_sws_compile_pass() */
     SwsGraph *graph = ff_sws_graph_alloc();
-    if (!graph)
-        return AVERROR(ENOMEM);
+    if (!graph) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
 
     SwsContext *ctx = graph->ctx = sws_alloc_context();
     if (!ctx) {
@@ -994,9 +1005,10 @@ int ff_sws_uops_macros_gen(char **out_str)
         goto fail;
     }
 
-    /* Use this to plumb the tree state through all the layers of abstraction 
*/
-    ctx->opaque = &root;
+    /* Use this to plumb the enum state through all the layers of abstraction 
*/
+    ctx->opaque = &s;
     ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */
+    ctx->threads = 0; /* use slice threading to speed up tree building */
 
     /* Register all unique uops over every relevant combination of flags */
     for (int i = 0; i < FF_ARRAY_ELEMS(flags); i++) {
@@ -1023,7 +1035,7 @@ int ff_sws_uops_macros_gen(char **out_str)
                     .mask = SWS_COMP_ELEMS(elems),
                 };
 
-                ret = register_uop(&root, &uop);
+                ret = register_uop(&s, &uop);
                 if (ret < 0)
                     goto fail;
             }
@@ -1060,10 +1072,10 @@ int ff_sws_uops_macros_gen(char **out_str)
             const char *macro  = uop_names[key.uop].macro;
             const char *prefix = pixel_types[key.type].prefix;
             av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro);
-            av_tree_enumerate(root, &key, enum_type, generate_entry_args);
+            av_tree_enumerate(s.root, &key, enum_type, generate_entry_args);
             av_bprintf(bp, "\n");
             av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, 
macro);
-            av_tree_enumerate(root, &key, enum_type, generate_entry_struct);
+            av_tree_enumerate(s.root, &key, enum_type, generate_entry_struct);
             av_bprintf(bp, "\n");
         }
     }
@@ -1073,8 +1085,9 @@ int ff_sws_uops_macros_gen(char **out_str)
 
 fail:
     av_bprint_finalize(bp, NULL);
-    av_tree_enumerate(root, NULL, NULL, free_uop_key);
-    av_tree_destroy(root);
+    av_tree_enumerate(s.root, NULL, NULL, free_uop_key);
+    av_tree_destroy(s.root);
+    pthread_mutex_destroy(&s.lock);
     ff_sws_graph_free(&graph);
     sws_free_context(&ctx);
     return ret;
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to