This patch adds slice threading support to the blackframe filter.
It implements the standard Map-Reduce pattern: each thread processes a
vertical slice of the frame, and the main thread sums the results.

Benchmarks:
Input: 4K Video (3840x2160, YUV420P), 250 frames, 100% black.
Command: ./ffmpeg -f lavfi -i color=c=black:s=3840x2160:d=10 -vf blackframe
-benchmark -f null -

Results (on 4-core system):
- Single-threaded: ~0.259s (real)
- Multi-threaded:  ~0.077s (real)
- Speedup:         ~3.36x

Signed-off-by: Raja Rathour <[email protected]>
From 230e819ab69f8022c4e6762b59b682a139ceaa0b Mon Sep 17 00:00:00 2001
From: Raja Rathour <[email protected]>
Date: Tue, 23 Dec 2025 19:48:46 +0530
Subject: [PATCH] avfilter/vf_blackframe: add slice threading

---
 libavfilter/vf_blackframe.c | 80 ++++++++++++++++++++++++++++++++-----
 1 file changed, 71 insertions(+), 9 deletions(-)

diff --git a/libavfilter/vf_blackframe.c b/libavfilter/vf_blackframe.c
index f0aa53e133..cb566d103d 100644
--- a/libavfilter/vf_blackframe.c
+++ b/libavfilter/vf_blackframe.c
@@ -32,6 +32,7 @@
 
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
+#include "libavutil/mem.h"
 #include "avfilter.h"
 #include "filters.h"
 #include "video.h"
@@ -45,6 +46,14 @@ typedef struct BlackFrameContext {
     unsigned int last_keyframe; ///< frame number of the last received key-frame
 } BlackFrameContext;
 
+typedef struct ThreadData {
+    const uint8_t *data; // Pointer to the image data
+    int linesize;        // How wide is the memory line
+    int bthresh;         // The black threshold
+    int width;           // Image width
+    unsigned int *counts; // POINTER to the array where threads write results
+} ThreadData;
+
 static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NV12,
     AV_PIX_FMT_NV21, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV411P,
@@ -55,22 +64,72 @@ static const enum AVPixelFormat pix_fmts[] = {
     snprintf(buf, sizeof(buf), format, value);  \
     av_dict_set(metadata, key, buf, 0)
 
+static int blackframe_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ThreadData *td = arg;
+    // Calculate vertical slice
+    int slice_start = (ctx->inputs[0]->h * jobnr) / nb_jobs;
+    int slice_end   = (ctx->inputs[0]->h * (jobnr+1)) / nb_jobs;
+    
+    // Safety check for pointers
+    if (!td || !td->data || !td->counts) return 0;
+
+    const uint8_t *p = td->data + slice_start * td->linesize;
+    unsigned int local_nblack = 0;
+    int x, y;
+
+    for (y = slice_start; y < slice_end; y++) {
+        for (x = 0; x < td->width; x++)
+            local_nblack += p[x] < td->bthresh;
+        p += td->linesize;
+    }
+
+    // Save my private count
+    td->counts[jobnr] = local_nblack;
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
     AVFilterContext *ctx = inlink->dst;
     BlackFrameContext *s = ctx->priv;
-    int x, i;
     int pblack = 0;
-    uint8_t *p = frame->data[0];
     AVDictionary **metadata;
     char buf[32];
-
-    for (i = 0; i < frame->height; i++) {
-        for (x = 0; x < inlink->w; x++)
-            s->nblack += p[x] < s->bthresh;
-        p += frame->linesize[0];
+    ThreadData td;
+    
+    // 1. Get thread count
+    int nb_threads = ff_filter_get_nb_threads(ctx);
+    
+    // Allocate memory for thread results
+    unsigned int *thread_counts = av_calloc(nb_threads, sizeof(*thread_counts));
+    if (!thread_counts) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to allocate thread_counts\n");
+        return AVERROR(ENOMEM);
     }
 
+    // 3. Prepare the data package
+    td.data = frame->data[0];
+    td.linesize = frame->linesize[0];
+    td.width = inlink->w;
+    td.bthresh = s->bthresh;
+    td.counts = thread_counts;
+
+    // 4. Run the threads!
+    // We calculate the exact number of jobs we are about to run
+    int nb_jobs = FFMIN(frame->height, nb_threads);
+    
+    ff_filter_execute(ctx, blackframe_slice, &td, NULL, nb_jobs);
+
+    // 5. THE REDUCE STEP: Sum up the results
+    s->nblack = 0;
+    // Only sum up the jobs that actually ran
+    for (int i = 0; i < nb_jobs; i++) {
+        s->nblack += thread_counts[i];
+    }
+    
+    // --- FROM HERE DOWN, THE CODE IS THE SAME AS THE ORIGINAL ---
+    
     if (frame->flags & AV_FRAME_FLAG_KEY)
         s->last_keyframe = s->frame;
 
@@ -89,6 +148,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 
     s->frame++;
     s->nblack = 0;
+    
+    av_free(thread_counts);
+
     return ff_filter_frame(inlink->dst->outputs[0], frame);
 }
 
@@ -118,9 +180,9 @@ const FFFilter ff_vf_blackframe = {
     .p.name        = "blackframe",
     .p.description = NULL_IF_CONFIG_SMALL("Detect frames that are (almost) black."),
     .p.priv_class  = &blackframe_class,
-    .p.flags       = AVFILTER_FLAG_METADATA_ONLY,
+    .p.flags       = AVFILTER_FLAG_METADATA_ONLY | AVFILTER_FLAG_SLICE_THREADS,
     .priv_size     = sizeof(BlackFrameContext),
     FILTER_INPUTS(avfilter_vf_blackframe_inputs),
     FILTER_OUTPUTS(ff_video_default_filterpad),
     FILTER_PIXFMTS_ARRAY(pix_fmts),
-};
+};
\ No newline at end of file
-- 
2.48.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to