PR #21532 opened by Steven Xiao (younengxiao) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21532 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21532.patch
This patch adds ONNX Runtime as a new DNN backend for FFmpeg's dnn_processing filter, enabling hardware-accelerated neural network inference on multiple GPU platforms. Features: - CPU execution provider (default) - CUDA execution provider (NVIDIA GPUs) - DirectML execution provider (AMD/Intel/NVIDIA GPUs on Windows) - Configurable GPU device selection via gpu_device_id option - Thread count configuration via num_threads option New filter options for dnn_processing with dnn_backend=onnx: - execution_provider: cpu, cuda, or dml (default: cpu) - gpu_device_id: GPU device index (default: 0) - num_threads: inference thread count (default: 0, auto) Example usage: **CPU inference** ffmpeg -i input.mp4 -vf "dnn_processing=dnn_backend=onnx:model=model.onnx" output.mp4 **CUDA GPU inference** ffmpeg -i input.mp4 -vf "dnn_processing=dnn_backend=onnx:model=model.onnx:execution_provider=cuda:gpu_device_id=0" output.mp4 **DirectML GPU inference (Windows)** ffmpeg -i input.mp4 -vf "dnn_processing=dnn_backend=onnx:model=model.onnx:execution_provider=dml:gpu_device_id=0" output.mp4 Build Instructions (MSYS2 MinGW): 1. Install MSYS2 from https://www.msys2.org/ 2. Install build tools in MSYS2 MinGW 64-bit terminal: pacman -S mingw-w64-x86_64-gcc mingw-w64-x86_64-nasm make diffutils pkg-config 3. Download ONNX Runtime (GPU version): https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-win-x64-gpu-1.23.2.zip 4. Extract to C:\onnxruntime with structure: C:\onnxruntime\include\onnxruntime_c_api.h C:\onnxruntime\lib\onnxruntime.dll C:\onnxruntime\lib\onnxruntime.lib 5. Configure and build: ./configure \ --enable-gpl \ --enable-libonnxruntime \ --extra-cflags="-I/c/onnxruntime/include -D_stdcall=__stdcall" \ --extra-ldflags="-L/c/onnxruntime/lib" \ --extra-libs="-lonnxruntime" make -j$(nproc) Note: The -D_stdcall=__stdcall flag is required for MinGW GCC compatibility with ONNX Runtime headers (MSVC uses _stdcall, GCC uses __stdcall). External dependency: - onnxruntime_c_api.h (header) - libonnxruntime (library) >From 26780145b167a25b51f256ad752775ea81b4a89c Mon Sep 17 00:00:00 2001 From: stevxiao <[email protected]> Date: Tue, 20 Jan 2026 15:31:40 -0500 Subject: [PATCH] avfilter/dnn: add ONNX Runtime backend with GPU execution provider support This patch adds ONNX Runtime as a new DNN backend for FFmpeg's dnn_processing filter, enabling hardware-accelerated neural network inference on multiple GPU platforms. Features: - CPU execution provider (default) - CUDA execution provider (NVIDIA GPUs) - DirectML execution provider (AMD/Intel/NVIDIA GPUs on Windows) - Configurable GPU device selection via gpu_device_id option - Thread count configuration via num_threads option New filter options for dnn_processing with dnn_backend=onnx: - execution_provider: cpu, cuda, or dml (default: cpu) - gpu_device_id: GPU device index (default: 0) - num_threads: inference thread count (default: 0, auto) Example usage: # CPU inference ffmpeg -i input.mp4 -vf "dnn_processing=dnn_backend=onnx:model=model.onnx" output.mp4 # CUDA GPU inference ffmpeg -i input.mp4 -vf "dnn_processing=dnn_backend=onnx:model=model.onnx:execution_provider=cuda:gpu_device_id=0" output.mp4 # DirectML GPU inference (Windows) ffmpeg -i input.mp4 -vf "dnn_processing=dnn_backend=onnx:model=model.onnx:execution_provider=dml:gpu_device_id=0" output.mp4 Build Instructions (MSYS2 MinGW): 1. Install MSYS2 from https://www.msys2.org/ 2. Install build tools in MSYS2 MinGW 64-bit terminal: pacman -S mingw-w64-x86_64-gcc mingw-w64-x86_64-nasm make diffutils pkg-config 3. Download ONNX Runtime (GPU version): https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-win-x64-gpu-1.23.2.zip 4. Extract to C:\onnxruntime with structure: C:\onnxruntime\include\onnxruntime_c_api.h C:\onnxruntime\lib\onnxruntime.dll C:\onnxruntime\lib\onnxruntime.lib 5. Configure and build: ./configure \ --enable-gpl \ --enable-libonnxruntime \ --extra-cflags="-I/c/onnxruntime/include -D_stdcall=__stdcall" \ --extra-ldflags="-L/c/onnxruntime/lib" \ --extra-libs="-lonnxruntime" make -j$(nproc) Note: The -D_stdcall=__stdcall flag is required for MinGW GCC compatibility with ONNX Runtime headers (MSVC uses _stdcall, GCC uses __stdcall). External dependency: - onnxruntime_c_api.h (header) - libonnxruntime (library) --- configure | 5 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_onnx.c | 851 +++++++++++++++++++++++++++++ libavfilter/dnn/dnn_interface.c | 4 + libavfilter/dnn_interface.h | 13 +- libavfilter/vf_dnn_processing.c | 5 +- 6 files changed, 876 insertions(+), 3 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_onnx.c diff --git a/configure b/configure index 01edfacacc..53b53780be 100755 --- a/configure +++ b/configure @@ -289,6 +289,7 @@ External library support: --enable-libtls enable LibreSSL (via libtls), needed for https support if openssl, gnutls or mbedtls is not used [no] --enable-libtorch enable Torch as one DNN backend [no] + --enable-libonnxruntime enable ONNX Runtime as a DNN module backend [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] @@ -2058,6 +2059,7 @@ EXTERNAL_LIBRARY_LIST=" libtheora libtls libtorch + libonnxruntime libtwolame libuavs3d libv4l2 @@ -2978,7 +2980,7 @@ dirac_parse_select="golomb" dovi_rpudec_select="golomb" dovi_rpuenc_select="golomb" dnn_deps="avformat swscale" -dnn_deps_any="libtensorflow libopenvino libtorch" +dnn_deps_any="libtensorflow libopenvino libtorch libonnxruntime" error_resilience_select="me_cmp" evcparse_select="golomb" faandct_deps="faan" @@ -7321,6 +7323,7 @@ enabled libtheora && require libtheora theora/theoraenc.h th_info_init - enabled libtls && require_pkg_config libtls libtls tls.h tls_configure && { enabled gpl && ! enabled nonfree && die "ERROR: LibreSSL is incompatible with the gpl"; } enabled libtorch && check_cxxflags -std=c++17 && require_cxx libtorch torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread +enabled libonnxruntime && require libonnxruntime onnxruntime_c_api.h OrtGetApiBase -lonnxruntime enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame && { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index 3d09927c98..7c5d7d8ab6 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -7,5 +7,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o +DNN-OBJS-$(CONFIG_LIBONNXRUNTIME) += dnn/dnn_backend_onnx.o OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes) diff --git a/libavfilter/dnn/dnn_backend_onnx.c b/libavfilter/dnn/dnn_backend_onnx.c new file mode 100644 index 0000000000..ce3c656fbc --- /dev/null +++ b/libavfilter/dnn/dnn_backend_onnx.c @@ -0,0 +1,851 @@ +/* + * Copyright (c) 2026 Advanced Micro Devices, Inc. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * DNN ONNX Runtime backend implementation. + */ + +#include "libavutil/opt.h" +#include "libavutil/avassert.h" +#include "libavutil/mem.h" +#include "libavutil/avstring.h" +#include "../filters.h" +#include "dnn_io_proc.h" +#include "dnn_backend_common.h" +#include "queue.h" +#include "safe_queue.h" +#include <onnxruntime_c_api.h> +#include <string.h> + +typedef struct ONNXModel { + DNNModel model; + DnnContext *ctx; + OrtEnv *env; + OrtSession *session; + OrtSessionOptions *session_options; + OrtAllocator *allocator; + SafeQueue *request_queue; + Queue *task_queue; + Queue *lltask_queue; +} ONNXModel; + +typedef struct ONNXInferRequest { + OrtValue *input_tensor; + OrtValue *output_tensor; +} ONNXInferRequest; + +typedef struct ONNXRequestItem { + ONNXInferRequest *infer_request; + LastLevelTaskItem *lltask; + DNNAsyncExecModule exec_module; +} ONNXRequestItem; + +#define OFFSET(x) offsetof(ONNXOptions, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM +static const AVOption dnn_onnx_options[] = { + { "num_threads", "number of threads for ONNX inference", OFFSET(num_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, + { "execution_provider", "execution provider for ONNX inference (cpu, cuda, dml)", OFFSET(execution_provider), AV_OPT_TYPE_STRING, { .str = "cpu" }, 0, 0, FLAGS }, + { "gpu_device_id", "GPU device ID for ONNX inference", OFFSET(gpu_device_id), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(dnn_onnx); + +static const OrtApi *g_ort = NULL; + +#define ORT_ABORT_ON_ERROR(expr) \ + do { \ + OrtStatus *status = (expr); \ + if (status != NULL) { \ + const char *msg = g_ort->GetErrorMessage(status); \ + av_log(ctx, AV_LOG_ERROR, "ONNX Runtime error: %s\n", msg); \ + g_ort->ReleaseStatus(status); \ + goto err; \ + } \ + } while (0) + +static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue) +{ + ONNXModel *onnx_model = (ONNXModel *)task->model; + DnnContext *ctx = onnx_model->ctx; + LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask)); + + if (!lltask) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for LastLevelTaskItem\n"); + return AVERROR(ENOMEM); + } + task->inference_todo = 1; + task->inference_done = 0; + lltask->task = task; + if (ff_queue_push_back(lltask_queue, lltask) < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to push back lltask_queue.\n"); + av_freep(&lltask); + return AVERROR(ENOMEM); + } + return 0; +} + +static void onnx_free_request(ONNXInferRequest *request) +{ + if (!request) + return; + if (request->input_tensor) { + g_ort->ReleaseValue(request->input_tensor); + request->input_tensor = NULL; + } + if (request->output_tensor) { + g_ort->ReleaseValue(request->output_tensor); + request->output_tensor = NULL; + } +} + +static inline void destroy_request_item(ONNXRequestItem **arg) +{ + ONNXRequestItem *item; + if (!arg || !*arg) + return; + item = *arg; + onnx_free_request(item->infer_request); + av_freep(&item->infer_request); + av_freep(&item->lltask); + ff_dnn_async_module_cleanup(&item->exec_module); + av_freep(arg); +} + +static void dnn_free_model_onnx(DNNModel **model) +{ + ONNXModel *onnx_model; + if (!model || !*model) + return; + + onnx_model = (ONNXModel *)(*model); + + while (ff_safe_queue_size(onnx_model->request_queue) != 0) { + ONNXRequestItem *item = (ONNXRequestItem *)ff_safe_queue_pop_front(onnx_model->request_queue); + destroy_request_item(&item); + } + ff_safe_queue_destroy(onnx_model->request_queue); + + while (ff_queue_size(onnx_model->lltask_queue) != 0) { + LastLevelTaskItem *item = (LastLevelTaskItem *)ff_queue_pop_front(onnx_model->lltask_queue); + av_freep(&item); + } + ff_queue_destroy(onnx_model->lltask_queue); + + while (ff_queue_size(onnx_model->task_queue) != 0) { + TaskItem *item = (TaskItem *)ff_queue_pop_front(onnx_model->task_queue); + av_frame_free(&item->in_frame); + av_frame_free(&item->out_frame); + av_freep(&item); + } + ff_queue_destroy(onnx_model->task_queue); + + if (onnx_model->session) + g_ort->ReleaseSession(onnx_model->session); + if (onnx_model->session_options) + g_ort->ReleaseSessionOptions(onnx_model->session_options); + if (onnx_model->env) + g_ort->ReleaseEnv(onnx_model->env); + + av_freep(&onnx_model); + *model = NULL; +} + +static int get_input_onnx(DNNModel *model, DNNData *input, const char *input_name) +{ + ONNXModel *onnx_model = (ONNXModel *)model; + DnnContext *ctx = onnx_model->ctx; + OrtTypeInfo *type_info = NULL; + const OrtTensorTypeAndShapeInfo *tensor_info = NULL; + size_t num_dims; + int64_t *dims; + ONNXTensorElementDataType tensor_type; + OrtStatus *status; + + status = g_ort->SessionGetInputTypeInfo(onnx_model->session, 0, &type_info); + if (status != NULL) { + const char *msg = g_ort->GetErrorMessage(status); + av_log(ctx, AV_LOG_ERROR, "Failed to get input type info: %s\n", msg); + g_ort->ReleaseStatus(status); + return AVERROR(EINVAL); + } + + status = g_ort->CastTypeInfoToTensorInfo(type_info, &tensor_info); + if (status != NULL) { + g_ort->ReleaseTypeInfo(type_info); + g_ort->ReleaseStatus(status); + return AVERROR(EINVAL); + } + + status = g_ort->GetDimensionsCount(tensor_info, &num_dims); + if (status != NULL) { + g_ort->ReleaseTypeInfo(type_info); + g_ort->ReleaseStatus(status); + return AVERROR(EINVAL); + } + + dims = av_malloc(num_dims * sizeof(int64_t)); + if (!dims) { + g_ort->ReleaseTypeInfo(type_info); + return AVERROR(ENOMEM); + } + + g_ort->GetDimensions(tensor_info, dims, num_dims); + g_ort->GetTensorElementType(tensor_info, &tensor_type); + + // Assume NCHW layout for now + input->layout = DL_NCHW; + input->dims[0] = dims[0] > 0 ? dims[0] : 1; + input->dims[1] = dims[1] > 0 ? dims[1] : 3; + input->dims[2] = dims[2] > 0 ? dims[2] : -1; + input->dims[3] = dims[3] > 0 ? dims[3] : -1; + + if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { + input->dt = DNN_FLOAT; + } else if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) { + input->dt = DNN_UINT8; + } else { + av_log(ctx, AV_LOG_ERROR, "Unsupported tensor data type\n"); + av_free(dims); + g_ort->ReleaseTypeInfo(type_info); + return AVERROR(ENOSYS); + } + + input->order = DCO_RGB; + av_free(dims); + g_ort->ReleaseTypeInfo(type_info); + return 0; +} + +static int fill_model_input_onnx(ONNXModel *onnx_model, ONNXRequestItem *request) +{ + LastLevelTaskItem *lltask = NULL; + TaskItem *task = NULL; + ONNXInferRequest *infer_request = NULL; + DNNData input = { 0 }; + DnnContext *ctx = onnx_model->ctx; + int ret, width_idx, height_idx, channel_idx; + int64_t input_shape[4]; + size_t input_tensor_size; + OrtMemoryInfo *memory_info; + OrtStatus *status; + + lltask = (LastLevelTaskItem *)ff_queue_pop_front(onnx_model->lltask_queue); + if (!lltask) { + ret = AVERROR(EINVAL); + goto err; + } + request->lltask = lltask; + task = lltask->task; + infer_request = request->infer_request; + + ret = get_input_onnx(&onnx_model->model, &input, NULL); + if (ret != 0) { + goto err; + } + + width_idx = dnn_get_width_idx_by_layout(input.layout); + height_idx = dnn_get_height_idx_by_layout(input.layout); + channel_idx = dnn_get_channel_idx_by_layout(input.layout); + + input.dims[height_idx] = task->in_frame->height; + input.dims[width_idx] = task->in_frame->width; + + input_shape[0] = input.dims[0]; + input_shape[1] = input.dims[channel_idx]; + input_shape[2] = input.dims[height_idx]; + input_shape[3] = input.dims[width_idx]; + + input_tensor_size = input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]; + + if (input.dt == DNN_FLOAT) { + input_tensor_size *= sizeof(float); + } else { + input_tensor_size *= sizeof(uint8_t); + } + + input.data = av_malloc(input_tensor_size); + if (!input.data) { + ret = AVERROR(ENOMEM); + goto err; + } + + switch (onnx_model->model.func_type) { + case DFT_PROCESS_FRAME: + if (input.dt == DNN_FLOAT) { + input.scale = 255; + } + if (task->do_ioproc) { + if (onnx_model->model.frame_pre_proc != NULL) { + onnx_model->model.frame_pre_proc(task->in_frame, &input, onnx_model->model.filter_ctx); + } else { + ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx); + } + } + break; + case DFT_ANALYTICS_DETECT: + ff_frame_to_dnn_detect(task->in_frame, &input, ctx); + break; + default: + avpriv_report_missing_feature(ctx, "model function type %d", onnx_model->model.func_type); + av_freep(&input.data); + ret = AVERROR(ENOSYS); + goto err; + } + + status = g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info); + if (status != NULL) { + av_freep(&input.data); + ret = AVERROR(ENOMEM); + goto err; + } + + if (input.dt == DNN_FLOAT) { + status = g_ort->CreateTensorWithDataAsOrtValue( + memory_info, input.data, input_tensor_size, + input_shape, 4, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, + &infer_request->input_tensor); + } else { + status = g_ort->CreateTensorWithDataAsOrtValue( + memory_info, input.data, input_tensor_size, + input_shape, 4, ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, + &infer_request->input_tensor); + } + + g_ort->ReleaseMemoryInfo(memory_info); + + if (status != NULL) { + const char *msg = g_ort->GetErrorMessage(status); + av_log(ctx, AV_LOG_ERROR, "Failed to create input tensor: %s\n", msg); + g_ort->ReleaseStatus(status); + av_freep(&input.data); + ret = AVERROR(ENOMEM); + goto err; + } + + return 0; + +err: + onnx_free_request(infer_request); + return ret; +} + +static int onnx_start_inference(void *args) +{ + ONNXRequestItem *request = (ONNXRequestItem *)args; + ONNXInferRequest *infer_request = NULL; + LastLevelTaskItem *lltask = NULL; + TaskItem *task = NULL; + ONNXModel *onnx_model = NULL; + DnnContext *ctx = NULL; + OrtStatus *status; + const char *input_names[] = {"input"}; + const char *output_names[] = {"output"}; + + if (!request) { + av_log(NULL, AV_LOG_ERROR, "ONNXRequestItem is NULL\n"); + return AVERROR(EINVAL); + } + + infer_request = request->infer_request; + lltask = request->lltask; + task = lltask->task; + onnx_model = (ONNXModel *)task->model; + ctx = onnx_model->ctx; + + if (!infer_request->input_tensor) { + av_log(ctx, AV_LOG_ERROR, "Input tensor is NULL\n"); + return DNN_GENERIC_ERROR; + } + + status = g_ort->Run(onnx_model->session, NULL, + input_names, (const OrtValue *const *)&infer_request->input_tensor, 1, + output_names, 1, &infer_request->output_tensor); + + if (status != NULL) { + const char *msg = g_ort->GetErrorMessage(status); + av_log(ctx, AV_LOG_ERROR, "ONNX inference failed: %s\n", msg); + g_ort->ReleaseStatus(status); + return DNN_GENERIC_ERROR; + } + + return 0; +} + +static void infer_completion_callback(void *args) +{ + ONNXRequestItem *request = (ONNXRequestItem *)args; + LastLevelTaskItem *lltask = request->lltask; + TaskItem *task = lltask->task; + DNNData outputs = { 0 }; + ONNXInferRequest *infer_request = request->infer_request; + ONNXModel *onnx_model = (ONNXModel *)task->model; + DnnContext *ctx = onnx_model->ctx; + OrtTensorTypeAndShapeInfo *tensor_info; + size_t num_dims; + int64_t *dims; + void *output_data; + OrtStatus *status; + + if (!infer_request->output_tensor) { + av_log(ctx, AV_LOG_ERROR, "Output tensor is NULL\n"); + goto err; + } + + status = g_ort->GetTensorTypeAndShape(infer_request->output_tensor, &tensor_info); + if (status != NULL) { + av_log(ctx, AV_LOG_ERROR, "Failed to get output tensor info\n"); + goto err; + } + + g_ort->GetDimensionsCount(tensor_info, &num_dims); + dims = av_malloc(num_dims * sizeof(int64_t)); + g_ort->GetDimensions(tensor_info, dims, num_dims); + + outputs.layout = DL_NCHW; + outputs.order = DCO_RGB; + outputs.dt = DNN_FLOAT; + + if (num_dims == 4) { + outputs.dims[0] = dims[0]; + outputs.dims[1] = dims[1]; + outputs.dims[2] = dims[2]; + outputs.dims[3] = dims[3]; + } else { + avpriv_report_missing_feature(ctx, "Support for %zu dimensional output", num_dims); + av_free(dims); + g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info); + goto err; + } + + status = g_ort->GetTensorMutableData(infer_request->output_tensor, &output_data); + if (status != NULL) { + av_free(dims); + g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info); + goto err; + } + + outputs.data = output_data; + + switch (onnx_model->model.func_type) { + case DFT_PROCESS_FRAME: + if (task->do_ioproc) { + outputs.scale = 255; + if (onnx_model->model.frame_post_proc != NULL) { + onnx_model->model.frame_post_proc(task->out_frame, &outputs, onnx_model->model.filter_ctx); + } else { + ff_proc_from_dnn_to_frame(task->out_frame, &outputs, ctx); + } + } else { + task->out_frame->width = outputs.dims[dnn_get_width_idx_by_layout(outputs.layout)]; + task->out_frame->height = outputs.dims[dnn_get_height_idx_by_layout(outputs.layout)]; + } + break; + default: + avpriv_report_missing_feature(ctx, "model function type %d", onnx_model->model.func_type); + av_free(dims); + g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info); + goto err; + } + + av_free(dims); + g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info); + task->inference_done++; + +err: + onnx_free_request(infer_request); + if (ff_safe_queue_push_back(onnx_model->request_queue, request) < 0) { + destroy_request_item(&request); + av_log(ctx, AV_LOG_ERROR, "Unable to push back request_queue.\n"); + } +} + +static int execute_model_onnx(ONNXRequestItem *request, Queue *lltask_queue) +{ + ONNXModel *onnx_model = NULL; + LastLevelTaskItem *lltask; + TaskItem *task = NULL; + int ret = 0; + + if (ff_queue_size(lltask_queue) == 0) { + destroy_request_item(&request); + return 0; + } + + lltask = (LastLevelTaskItem *)ff_queue_peek_front(lltask_queue); + if (lltask == NULL) { + av_log(NULL, AV_LOG_ERROR, "Failed to get LastLevelTaskItem\n"); + ret = AVERROR(EINVAL); + goto err; + } + task = lltask->task; + onnx_model = (ONNXModel *)task->model; + + ret = fill_model_input_onnx(onnx_model, request); + if (ret != 0) { + goto err; + } + + if (task->async) { + avpriv_report_missing_feature(onnx_model->ctx, "ONNX async inference"); + ret = AVERROR(ENOSYS); + goto err; + } else { + ret = onnx_start_inference((void *)request); + if (ret != 0) { + goto err; + } + infer_completion_callback(request); + return (task->inference_done == task->inference_todo) ? 0 : DNN_GENERIC_ERROR; + } + +err: + onnx_free_request(request->infer_request); + if (ff_safe_queue_push_back(onnx_model->request_queue, request) < 0) { + destroy_request_item(&request); + } + return ret; +} + +static int get_output_onnx(DNNModel *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) +{ + int ret = 0; + ONNXModel *onnx_model = (ONNXModel *)model; + DnnContext *ctx = onnx_model->ctx; + TaskItem task = { 0 }; + ONNXRequestItem *request = NULL; + DNNExecBaseParams exec_params = { + .input_name = input_name, + .output_names = &output_name, + .nb_output = 1, + .in_frame = NULL, + .out_frame = NULL, + }; + + ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, onnx_model, input_height, input_width, ctx); + if (ret != 0) { + goto err; + } + + ret = extract_lltask_from_task(&task, onnx_model->lltask_queue); + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to extract last level task from task.\n"); + goto err; + } + + request = (ONNXRequestItem *)ff_safe_queue_pop_front(onnx_model->request_queue); + if (!request) { + av_log(ctx, AV_LOG_ERROR, "Unable to get infer request.\n"); + ret = AVERROR(EINVAL); + goto err; + } + + ret = execute_model_onnx(request, onnx_model->lltask_queue); + *output_width = task.out_frame->width; + *output_height = task.out_frame->height; + +err: + av_frame_free(&task.out_frame); + av_frame_free(&task.in_frame); + return ret; +} + +static ONNXInferRequest *onnx_create_inference_request(void) +{ + ONNXInferRequest *request = av_malloc(sizeof(ONNXInferRequest)); + if (!request) + return NULL; + request->input_tensor = NULL; + request->output_tensor = NULL; + return request; +} + +static DNNModel *dnn_load_model_onnx(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) +{ + DNNModel *model = NULL; + ONNXModel *onnx_model = NULL; + ONNXRequestItem *item = NULL; + ONNXOptions *options = &ctx->onnx_option; + OrtStatus *status; + + if (!g_ort) { + g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); + if (!g_ort) { + av_log(ctx, AV_LOG_ERROR, "Failed to get ONNX Runtime API\n"); + return NULL; + } + } + + onnx_model = av_mallocz(sizeof(ONNXModel)); + if (!onnx_model) + return NULL; + + model = &onnx_model->model; + onnx_model->ctx = ctx; + + // Create ONNX Runtime environment + status = g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "FFmpeg", &onnx_model->env); + if (status != NULL) { + av_log(ctx, AV_LOG_ERROR, "Failed to create ONNX Runtime environment\n"); + goto fail; + } + + // Create session options + status = g_ort->CreateSessionOptions(&onnx_model->session_options); + if (status != NULL) { + av_log(ctx, AV_LOG_ERROR, "Failed to create session options\n"); + goto fail; + } + + // Set number of threads + if (options->num_threads > 0) { + g_ort->SetIntraOpNumThreads(onnx_model->session_options, options->num_threads); + } + + // Set graph optimization level + g_ort->SetSessionGraphOptimizationLevel(onnx_model->session_options, ORT_ENABLE_ALL); + + // Configure execution provider based on user selection + if (options->execution_provider && av_strcasecmp(options->execution_provider, "cpu") != 0) { + // GPU execution providers + if (av_strcasecmp(options->execution_provider, "cuda") == 0) { + // CUDA execution provider for NVIDIA GPUs + if (g_ort->SessionOptionsAppendExecutionProvider_CUDA) { + OrtCUDAProviderOptions cuda_options; + memset(&cuda_options, 0, sizeof(cuda_options)); + cuda_options.device_id = options->gpu_device_id; + + status = g_ort->SessionOptionsAppendExecutionProvider_CUDA( + onnx_model->session_options, &cuda_options); + if (status != NULL) { + const char *msg = g_ort->GetErrorMessage(status); + av_log(ctx, AV_LOG_WARNING, "Failed to enable CUDA (device %d): %s. Falling back to CPU\n", + options->gpu_device_id, msg); + g_ort->ReleaseStatus(status); + } else { + av_log(ctx, AV_LOG_INFO, "Using CUDA execution provider on device %d\n", options->gpu_device_id); + } + } else { + av_log(ctx, AV_LOG_WARNING, "CUDA provider function not available in this ONNX Runtime API version. Falling back to CPU\n"); + } + } else if (av_strcasecmp(options->execution_provider, "dml") == 0) { + // DirectML execution provider for AMD/Intel/NVIDIA GPUs on Windows +#ifdef _WIN32 + // Use generic SessionOptionsAppendExecutionProvider with "DML" provider name + const char* dml_options_keys[] = {"device_id"}; + const char* dml_options_values[] = {NULL}; + char device_id_str[32]; + snprintf(device_id_str, sizeof(device_id_str), "%d", options->gpu_device_id); + dml_options_values[0] = device_id_str; + + if (g_ort->SessionOptionsAppendExecutionProvider) { + status = g_ort->SessionOptionsAppendExecutionProvider( + onnx_model->session_options, "DML", + dml_options_keys, dml_options_values, 1); + if (status != NULL) { + const char *msg = g_ort->GetErrorMessage(status); + av_log(ctx, AV_LOG_WARNING, "Failed to enable DirectML (device %d): %s. Falling back to CPU\n", + options->gpu_device_id, msg); + g_ort->ReleaseStatus(status); + } else { + av_log(ctx, AV_LOG_INFO, "Using DirectML execution provider on device %d\n", options->gpu_device_id); + } + } else { + av_log(ctx, AV_LOG_WARNING, "DirectML provider function not available in this ONNX Runtime API version. Falling back to CPU\n"); + } +#else + av_log(ctx, AV_LOG_WARNING, "DirectML is only available on Windows. Falling back to CPU\n"); +#endif + } else { + av_log(ctx, AV_LOG_WARNING, "Unknown execution provider '%s'. Supported: cpu, cuda. Using CPU\n", + options->execution_provider); + } + } else { + av_log(ctx, AV_LOG_INFO, "Using CPU execution provider\n"); + } + + // Create session +#ifdef _WIN32 + { + wchar_t *wfilename; + int wlen = MultiByteToWideChar(CP_UTF8, 0, ctx->model_filename, -1, NULL, 0); + wfilename = av_malloc(wlen * sizeof(wchar_t)); + if (!wfilename) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for wide filename\n"); + goto fail; + } + MultiByteToWideChar(CP_UTF8, 0, ctx->model_filename, -1, wfilename, wlen); + status = g_ort->CreateSession(onnx_model->env, wfilename, + onnx_model->session_options, &onnx_model->session); + av_free(wfilename); + } +#else + status = g_ort->CreateSession(onnx_model->env, ctx->model_filename, + onnx_model->session_options, &onnx_model->session); +#endif + if (status != NULL) { + const char *msg = g_ort->GetErrorMessage(status); + av_log(ctx, AV_LOG_ERROR, "Failed to create ONNX session: %s\n", msg); + g_ort->ReleaseStatus(status); + goto fail; + } + + // Get allocator + status = g_ort->GetAllocatorWithDefaultOptions(&onnx_model->allocator); + if (status != NULL) { + av_log(ctx, AV_LOG_ERROR, "Failed to get allocator\n"); + goto fail; + } + + // Create request queue + onnx_model->request_queue = ff_safe_queue_create(); + if (!onnx_model->request_queue) { + goto fail; + } + + // Create and add initial request item + item = av_mallocz(sizeof(ONNXRequestItem)); + if (!item) { + goto fail; + } + item->lltask = NULL; + item->infer_request = onnx_create_inference_request(); + if (!item->infer_request) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for ONNX inference request\n"); + goto fail; + } + item->exec_module.start_inference = &onnx_start_inference; + item->exec_module.callback = &infer_completion_callback; + item->exec_module.args = item; + + if (ff_safe_queue_push_back(onnx_model->request_queue, item) < 0) { + goto fail; + } + item = NULL; + + // Create task queues + onnx_model->task_queue = ff_queue_create(); + if (!onnx_model->task_queue) { + goto fail; + } + + onnx_model->lltask_queue = ff_queue_create(); + if (!onnx_model->lltask_queue) { + goto fail; + } + + model->get_input = &get_input_onnx; + model->get_output = &get_output_onnx; + model->filter_ctx = filter_ctx; + model->func_type = func_type; + + return model; + +fail: + if (item) { + destroy_request_item(&item); + } + dnn_free_model_onnx(&model); + return NULL; +} + +static int dnn_execute_model_onnx(const DNNModel *model, DNNExecBaseParams *exec_params) +{ + ONNXModel *onnx_model = (ONNXModel *)model; + DnnContext *ctx = onnx_model->ctx; + TaskItem *task; + ONNXRequestItem *request; + int ret = 0; + + ret = ff_check_exec_params(ctx, DNN_ONNX, model->func_type, exec_params); + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "Exec parameter checking failed.\n"); + return ret; + } + + task = av_malloc(sizeof(TaskItem)); + if (!task) { + av_log(ctx, AV_LOG_ERROR, "Unable to alloc memory for task item.\n"); + return AVERROR(ENOMEM); + } + + ret = ff_dnn_fill_task(task, exec_params, onnx_model, 0, 1); + if (ret != 0) { + av_freep(&task); + av_log(ctx, AV_LOG_ERROR, "Unable to fill task.\n"); + return ret; + } + + ret = ff_queue_push_back(onnx_model->task_queue, task); + if (ret < 0) { + av_freep(&task); + av_log(ctx, AV_LOG_ERROR, "Unable to push back task_queue.\n"); + return ret; + } + + ret = extract_lltask_from_task(task, onnx_model->lltask_queue); + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to extract last level task from task.\n"); + return ret; + } + + request = (ONNXRequestItem *)ff_safe_queue_pop_front(onnx_model->request_queue); + if (!request) { + av_log(ctx, AV_LOG_ERROR, "Unable to get infer request.\n"); + return AVERROR(EINVAL); + } + + return execute_model_onnx(request, onnx_model->lltask_queue); +} + +static DNNAsyncStatusType dnn_get_result_onnx(const DNNModel *model, AVFrame **in, AVFrame **out) +{ + ONNXModel *onnx_model = (ONNXModel *)model; + return ff_dnn_get_result_common(onnx_model->task_queue, in, out); +} + +static int dnn_flush_onnx(const DNNModel *model) +{ + ONNXModel *onnx_model = (ONNXModel *)model; + ONNXRequestItem *request; + + if (ff_queue_size(onnx_model->lltask_queue) == 0) + return 0; + + request = (ONNXRequestItem *)ff_safe_queue_pop_front(onnx_model->request_queue); + if (!request) { + av_log(onnx_model->ctx, AV_LOG_ERROR, "Unable to get infer request.\n"); + return AVERROR(EINVAL); + } + + return execute_model_onnx(request, onnx_model->lltask_queue); +} + +const DNNModule ff_dnn_backend_onnx = { + .clazz = DNN_DEFINE_CLASS(dnn_onnx), + .type = DNN_ONNX, + .load_model = dnn_load_model_onnx, + .execute_model = dnn_execute_model_onnx, + .get_result = dnn_get_result_onnx, + .flush = dnn_flush_onnx, + .free_model = dnn_free_model_onnx, +}; diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index c4e410756b..31f22e26b0 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -33,6 +33,7 @@ extern const DNNModule ff_dnn_backend_openvino; extern const DNNModule ff_dnn_backend_tf; extern const DNNModule ff_dnn_backend_torch; +extern const DNNModule ff_dnn_backend_onnx; #define OFFSET(x) offsetof(DnnContext, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM @@ -78,6 +79,9 @@ static const DnnBackendInfo dnn_backend_info_list[] = { #if CONFIG_LIBTORCH {offsetof(DnnContext, torch_option), .module = &ff_dnn_backend_torch}, #endif +#if CONFIG_LIBONNXRUNTIME + {offsetof(DnnContext, onnx_option), .module = &ff_dnn_backend_onnx}, +#endif }; const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx) diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 66086409be..6aede1f37c 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -35,7 +35,8 @@ typedef enum { DNN_TF = 1, DNN_OV = 1 << 1, - DNN_TH = 1 << 2 + DNN_TH = 1 << 2, + DNN_ONNX = 1 << 3 } DNNBackendType; typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType; @@ -138,6 +139,13 @@ typedef struct THOptions { int optimize; } THOptions; +typedef struct ONNXOptions { + const AVClass *clazz; + int num_threads; + char *execution_provider; // "cpu", "cuda", "dml" (DirectML) + int gpu_device_id; +} ONNXOptions; + typedef struct DNNModule DNNModule; typedef struct DnnContext { @@ -169,6 +177,9 @@ typedef struct DnnContext { #if CONFIG_LIBTORCH THOptions torch_option; #endif +#if CONFIG_LIBONNXRUNTIME + ONNXOptions onnx_option; +#endif } DnnContext; // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends. diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index 0771ceb5fc..7ffa700cc5 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -52,11 +52,14 @@ static const AVOption dnn_processing_options[] = { #endif #if (CONFIG_LIBTORCH == 1) { "torch", "torch backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TH }, 0, 0, FLAGS, "backend" }, +#endif +#if (CONFIG_LIBONNXRUNTIME == 1) + { "onnx", "onnx backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_ONNX }, 0, 0, FLAGS, "backend" }, #endif { NULL } }; -AVFILTER_DNN_DEFINE_CLASS(dnn_processing, DNN_TF | DNN_OV | DNN_TH); +AVFILTER_DNN_DEFINE_CLASS(dnn_processing, DNN_TF | DNN_OV | DNN_TH | DNN_ONNX); static av_cold int init(AVFilterContext *context) { -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
