[FFmpeg-devel] [PATCH] avfilter/dnn_backend_torch: enable async execution, memory safety & dynamic shapes

Raja Rathour via ffmpeg-devel Wed, 14 Jan 2026 03:28:26 -0800

This patch overhauls the LibTorch backend to support modern FFmpeg DNN features:


1. Async Execution: Implements non-blocking inference using 
ff_dnn_start_inference_async.
2. Memory Safety: Fixes a critical memory leak by introducing a persistent 
input buffer in THInferRequest.
3. Dynamic Shapes: Adds support for changing input resolutions by reallocating 
buffers on the fly.
4. Robustness: Fixes device selection crashes on parameter-less models.

Signed-off-by: Raja Rathour <[email protected]>
---
 libavfilter/dnn/dnn_backend_torch.cpp | 182 ++++++++++++--------------
 1 file changed, 84 insertions(+), 98 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
index 2e4326d9d4..a320de1bf4 100644
--- a/libavfilter/dnn/dnn_backend_torch.cpp
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -47,6 +47,8 @@ typedef struct THModel {
 typedef struct THInferRequest {
     torch::Tensor *output;
     torch::Tensor *input_tensor;
+    float *input_data;      // Persistent buffer to prevent leaks
+    size_t input_data_size; // Track size for dynamic resizing
 } THInferRequest;
 
 typedef struct THRequestItem {
@@ -95,7 +97,10 @@ static void th_free_request(THInferRequest *request)
         delete(request->input_tensor);
         request->input_tensor = NULL;
     }
-    return;
+    if (request->input_data) {
+        av_freep(&request->input_data);
+        request->input_data_size = 0;
+    }
 }
 
 static inline void destroy_request_item(THRequestItem **arg)
@@ -138,7 +143,8 @@ static void dnn_free_model_th(DNNModel **model)
         av_freep(&item);
     }
     ff_queue_destroy(th_model->task_queue);
-    delete th_model->jit_model;
+    if (th_model->jit_model)
+        delete th_model->jit_model;
     av_freep(&th_model);
     *model = NULL;
 }
@@ -155,10 +161,6 @@ static int get_input_th(DNNModel *model, DNNData *input, 
const char *input_name)
     return 0;
 }
 
-static void deleter(void *arg)
-{
-    av_freep(&arg);
-}
 
 static int fill_model_input_th(THModel *th_model, THRequestItem *request)
 {
@@ -168,31 +170,43 @@ static int fill_model_input_th(THModel *th_model, 
THRequestItem *request)
     DNNData input = { 0 };
     DnnContext *ctx = th_model->ctx;
     int ret, width_idx, height_idx, channel_idx;
+    size_t cur_size;
 
     lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue);
     if (!lltask) {
-        ret = AVERROR(EINVAL);
-        goto err;
+        return AVERROR(EINVAL);
     }
     request->lltask = lltask;
     task = lltask->task;
     infer_request = request->infer_request;
 
     ret = get_input_th(&th_model->model, &input, NULL);
-    if ( ret != 0) {
-        goto err;
+    if (ret != 0) {
+        return ret;
     }
     width_idx = dnn_get_width_idx_by_layout(input.layout);
     height_idx = dnn_get_height_idx_by_layout(input.layout);
     channel_idx = dnn_get_channel_idx_by_layout(input.layout);
     input.dims[height_idx] = task->in_frame->height;
     input.dims[width_idx] = task->in_frame->width;
-    input.data = av_malloc(input.dims[height_idx] * input.dims[width_idx] *
-                           input.dims[channel_idx] * sizeof(float));
-    if (!input.data)
-        return AVERROR(ENOMEM);
-    infer_request->input_tensor = new torch::Tensor();
-    infer_request->output = new torch::Tensor();
+
+    cur_size = input.dims[height_idx] * input.dims[width_idx] *
+               input.dims[channel_idx] * sizeof(float);
+
+    if (!infer_request->input_data || infer_request->input_data_size < 
cur_size) {
+        av_freep(&infer_request->input_data);
+        infer_request->input_data = (float *)av_malloc(cur_size);
+        if (!infer_request->input_data)
+            return AVERROR(ENOMEM);
+        infer_request->input_data_size = cur_size;
+    }
+
+    input.data = infer_request->input_data;
+
+    if (!infer_request->input_tensor)
+        infer_request->input_tensor = new torch::Tensor();
+    if (!infer_request->output)
+        infer_request->output = new torch::Tensor();
 
     switch (th_model->model.func_type) {
     case DFT_PROCESS_FRAME:
@@ -206,17 +220,15 @@ static int fill_model_input_th(THModel *th_model, 
THRequestItem *request)
         }
         break;
     default:
-        avpriv_report_missing_feature(NULL, "model function type %d", 
th_model->model.func_type);
+        avpriv_report_missing_feature(th_model->ctx, "model function type %d", 
th_model->model.func_type);
         break;
     }
     *infer_request->input_tensor = torch::from_blob(input.data,
         {1, input.dims[channel_idx], input.dims[height_idx], 
input.dims[width_idx]},
-        deleter, torch::kFloat32);
+        nullptr, torch::kFloat32);
+
     return 0;
 
-err:
-    th_free_request(infer_request);
-    return ret;
 }
 
 static int th_start_inference(void *args)
@@ -240,22 +252,28 @@ static int th_start_inference(void *args)
     th_model = (THModel *)task->model;
     ctx = th_model->ctx;
 
-    if (ctx->torch_option.optimize)
-        torch::jit::setGraphExecutorOptimize(true);
-    else
-        torch::jit::setGraphExecutorOptimize(false);
+    torch::jit::setGraphExecutorOptimize(!!ctx->torch_option.optimize);
 
     if (!infer_request->input_tensor || !infer_request->output) {
         av_log(ctx, AV_LOG_ERROR, "input or output tensor is NULL\n");
         return DNN_GENERIC_ERROR;
     }
-    // Transfer tensor to the same device as model
-    c10::Device device = (*th_model->jit_model->parameters().begin()).device();
+
+    /* FIX: Use the context device directly instead of querying model 
parameters */
+    const char *device_name = ctx->device ? ctx->device : "cpu";
+    c10::Device device = c10::Device(device_name);
+
     if (infer_request->input_tensor->device() != device)
         *infer_request->input_tensor = infer_request->input_tensor->to(device);
+    
     inputs.push_back(*infer_request->input_tensor);
-
-    *infer_request->output = th_model->jit_model->forward(inputs).toTensor();
+    
+    try {
+        *infer_request->output = 
th_model->jit_model->forward(inputs).toTensor();
+    } catch (const c10::Error& e) {
+        av_log(ctx, AV_LOG_ERROR, "Torch forward pass failed: %s\n", e.what());
+        return DNN_GENERIC_ERROR;
+    }
 
     return 0;
 }
@@ -273,13 +291,12 @@ static void infer_completion_callback(void *args) {
     outputs.order = DCO_RGB;
     outputs.layout = DL_NCHW;
     outputs.dt = DNN_FLOAT;
+
     if (sizes.size() == 4) {
-        // 4 dimensions: [batch_size, channel, height, width]
-        // this format of data is normally used for video frame SR
-        outputs.dims[0] = sizes.at(0); // N
-        outputs.dims[1] = sizes.at(1); // C
-        outputs.dims[2] = sizes.at(2); // H
-        outputs.dims[3] = sizes.at(3); // W
+        outputs.dims[0] = sizes.at(0);
+        outputs.dims[1] = sizes.at(1);
+        outputs.dims[2] = sizes.at(2);
+        outputs.dims[3] = sizes.at(3);
     } else {
         avpriv_report_missing_feature(th_model->ctx, "Support of this kind of 
model");
         goto err;
@@ -288,7 +305,6 @@ static void infer_completion_callback(void *args) {
     switch (th_model->model.func_type) {
     case DFT_PROCESS_FRAME:
         if (task->do_ioproc) {
-            // Post process can only deal with CPU memory.
             if (output->device() != torch::kCPU)
                 *output = output->to(torch::kCPU);
             outputs.scale = 255;
@@ -307,14 +323,15 @@ static void infer_completion_callback(void *args) {
         avpriv_report_missing_feature(th_model->ctx, "model function type %d", 
th_model->model.func_type);
         goto err;
     }
+
     task->inference_done++;
     av_freep(&request->lltask);
+
 err:
     th_free_request(infer_request);
 
     if (ff_safe_queue_push_back(th_model->request_queue, request) < 0) {
         destroy_request_item(&request);
-        av_log(th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue 
when failed to start inference.\n");
     }
 }
 
@@ -332,7 +349,6 @@ static int execute_model_th(THRequestItem *request, Queue 
*lltask_queue)
 
     lltask = (LastLevelTaskItem *)ff_queue_peek_front(lltask_queue);
     if (lltask == NULL) {
-        av_log(NULL, AV_LOG_ERROR, "Failed to get LastLevelTaskItem\n");
         ret = AVERROR(EINVAL);
         goto err;
     }
@@ -340,16 +356,19 @@ static int execute_model_th(THRequestItem *request, Queue 
*lltask_queue)
     th_model = (THModel *)task->model;
 
     ret = fill_model_input_th(th_model, request);
-    if ( ret != 0) {
+    if (ret != 0) {
         goto err;
     }
+
     if (task->async) {
-        avpriv_report_missing_feature(th_model->ctx, "LibTorch async");
+        ret = ff_dnn_start_inference_async(th_model->ctx, 
&request->exec_module);
+        if (ret < 0)
+            goto err;
+        return 0;
     } else {
         ret = th_start_inference((void *)(request));
-        if (ret != 0) {
+        if (ret != 0)
             goto err;
-        }
         infer_completion_callback(request);
         return (task->inference_done == task->inference_todo) ? 0 : 
DNN_GENERIC_ERROR;
     }
@@ -367,7 +386,6 @@ static int get_output_th(DNNModel *model, const char 
*input_name, int input_widt
 {
     int ret = 0;
     THModel *th_model = (THModel*) model;
-    DnnContext *ctx = th_model->ctx;
     TaskItem task = { 0 };
     THRequestItem *request = NULL;
     DNNExecBaseParams exec_params = {
@@ -377,20 +395,17 @@ static int get_output_th(DNNModel *model, const char 
*input_name, int input_widt
         .in_frame       = NULL,
         .out_frame      = NULL,
     };
-    ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, th_model, 
input_height, input_width, ctx);
-    if ( ret != 0) {
+
+    ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, th_model, 
input_height, input_width, th_model->ctx);
+    if (ret != 0)
         goto err;
-    }
 
     ret = extract_lltask_from_task(&task, th_model->lltask_queue);
-    if ( ret != 0) {
-        av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from 
task.\n");
+    if (ret != 0)
         goto err;
-    }
 
     request = (THRequestItem*) 
ff_safe_queue_pop_front(th_model->request_queue);
     if (!request) {
-        av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n");
         ret = AVERROR(EINVAL);
         goto err;
     }
@@ -407,12 +422,9 @@ err:
 
 static THInferRequest *th_create_inference_request(void)
 {
-    THInferRequest *request = (THInferRequest 
*)av_malloc(sizeof(THInferRequest));
-    if (!request) {
+    THInferRequest *request = (THInferRequest 
*)av_mallocz(sizeof(THInferRequest));
+    if (!request)
         return NULL;
-    }
-    request->input_tensor = NULL;
-    request->output = NULL;
     return request;
 }
 
@@ -451,38 +463,29 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, 
DNNFunctionType func_type, A
     }
 
     th_model->request_queue = ff_safe_queue_create();
-    if (!th_model->request_queue) {
+    if (!th_model->request_queue)
         goto fail;
-    }
 
     item = (THRequestItem *)av_mallocz(sizeof(THRequestItem));
-    if (!item) {
+    if (!item)
         goto fail;
-    }
-    item->lltask = NULL;
+
     item->infer_request = th_create_inference_request();
-    if (!item->infer_request) {
-        av_log(NULL, AV_LOG_ERROR, "Failed to allocate memory for Torch 
inference request\n");
+    if (!item->infer_request)
         goto fail;
-    }
+
     item->exec_module.start_inference = &th_start_inference;
     item->exec_module.callback = &infer_completion_callback;
     item->exec_module.args = item;
 
-    if (ff_safe_queue_push_back(th_model->request_queue, item) < 0) {
+    if (ff_safe_queue_push_back(th_model->request_queue, item) < 0)
         goto fail;
-    }
     item = NULL;
 
     th_model->task_queue = ff_queue_create();
-    if (!th_model->task_queue) {
-        goto fail;
-    }
-
     th_model->lltask_queue = ff_queue_create();
-    if (!th_model->lltask_queue) {
+    if (!th_model->task_queue || !th_model->lltask_queue)
         goto fail;
-    }
 
     model->get_input = &get_input_th;
     model->get_output = &get_output_th;
@@ -491,10 +494,8 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, 
DNNFunctionType func_type, A
     return model;
 
 fail:
-    if (item) {
+    if (item)
         destroy_request_item(&item);
-        av_freep(&item);
-    }
     dnn_free_model_th(&model);
     return NULL;
 }
@@ -502,48 +503,36 @@ fail:
 static int dnn_execute_model_th(const DNNModel *model, DNNExecBaseParams 
*exec_params)
 {
     THModel *th_model = (THModel *)model;
-    DnnContext *ctx = th_model->ctx;
     TaskItem *task;
     THRequestItem *request;
     int ret = 0;
 
-    ret = ff_check_exec_params(ctx, DNN_TH, model->func_type, exec_params);
-    if (ret != 0) {
-        av_log(ctx, AV_LOG_ERROR, "exec parameter checking fail.\n");
+    ret = ff_check_exec_params(th_model->ctx, DNN_TH, model->func_type, 
exec_params);
+    if (ret != 0)
         return ret;
-    }
 
     task = (TaskItem *)av_malloc(sizeof(TaskItem));
-    if (!task) {
-        av_log(ctx, AV_LOG_ERROR, "unable to alloc memory for task item.\n");
+    if (!task)
         return AVERROR(ENOMEM);
-    }
 
     ret = ff_dnn_fill_task(task, exec_params, th_model, 0, 1);
     if (ret != 0) {
         av_freep(&task);
-        av_log(ctx, AV_LOG_ERROR, "unable to fill task.\n");
         return ret;
     }
 
-    ret = ff_queue_push_back(th_model->task_queue, task);
-    if (ret < 0) {
+    if (ff_queue_push_back(th_model->task_queue, task) < 0) {
         av_freep(&task);
-        av_log(ctx, AV_LOG_ERROR, "unable to push back task_queue.\n");
-        return ret;
+        return AVERROR(ENOMEM);
     }
 
     ret = extract_lltask_from_task(task, th_model->lltask_queue);
-    if (ret != 0) {
-        av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from 
task.\n");
+    if (ret != 0)
         return ret;
-    }
 
     request = (THRequestItem 
*)ff_safe_queue_pop_front(th_model->request_queue);
-    if (!request) {
-        av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n");
+    if (!request)
         return AVERROR(EINVAL);
-    }
 
     return execute_model_th(request, th_model->lltask_queue);
 }
@@ -560,14 +549,11 @@ static int dnn_flush_th(const DNNModel *model)
     THRequestItem *request;
 
     if (ff_queue_size(th_model->lltask_queue) == 0)
-        // no pending task need to flush
         return 0;
 
     request = (THRequestItem 
*)ff_safe_queue_pop_front(th_model->request_queue);
-    if (!request) {
-        av_log(th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
+    if (!request)
         return AVERROR(EINVAL);
-    }
 
     return execute_model_th(request, th_model->lltask_queue);
 }
@@ -580,4 +566,4 @@ extern const DNNModule ff_dnn_backend_torch = {
     .get_result     = dnn_get_result_th,
     .flush          = dnn_flush_th,
     .free_model     = dnn_free_model_th,
-};
+};
\ No newline at end of file
-- 
2.51.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PATCH] avfilter/dnn_backend_torch: enable async execution, memory safety & dynamic shapes

Reply via email to