This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new a2856b3c30 avfilter/dnn: add ONNX Runtime backend with GPU execution
provider support
a2856b3c30 is described below
commit a2856b3c3084ea45ca5b94a5daded25f85c41dac
Author: stevxiao <[email protected]>
AuthorDate: Tue Jan 20 15:31:40 2026 -0500
Commit: Guo Yejun <[email protected]>
CommitDate: Mon Jun 22 20:06:40 2026 +0800
avfilter/dnn: add ONNX Runtime backend with GPU execution provider support
This patch adds ONNX Runtime as a new DNN backend for FFmpeg's
dnn_processing
filter, enabling hardware-accelerated neural network inference on multiple
GPU and NPU platforms.
Execution Providers Supported:
- CPU execution provider (default)
- CUDA execution provider (NVIDIA GPUs)
- DirectML execution provider (AMD/Intel/NVIDIA GPUs on Windows)
- VitisAI execution provider (AMD Ryzen AI NPU)
The options for dnn_processing with dnn_backend=onnx:
- device: execution provider — cpu, cuda, dml, or vitisai (default: cpu)
- device_id: GPU device index (default: 0)
- threads_per_operation: inference thread count for CPU EP (default: 0,
auto)
- input: input tensor name. When omitted the backend resolves it from
loaded session
- output: output tensor name. When omitted the backend resolves it from
loaded session
Example usage:
# CPU inference
ffmpeg -i input.mp4 -vf
"format=rgb24,dnn_processing=dnn_backend=onnx:model=model.onnx:input=image_in:output=image_out"
output.mp4
# CUDA GPU inference
ffmpeg -i input.mp4 -vf
"dnn_processing=dnn_backend=onnx:model=model.onnx:device=cuda:device_id=0"
output.mp4
# DirectML GPU inference (Windows)
ffmpeg -i input.mp4 -vf
"dnn_processing=dnn_backend=onnx:model=model.onnx:device=dml:device_id=0"
output.mp4
# VitisAI NPU inference
ffmpeg -i input.mp4 -vf
"dnn_processing=dnn_backend=onnx:model=model.onnx:device=vitisai" output.mp4
Note: depending on the model, you may need a format filter (e.g.
format=rgb24 or format=grayf32) before dnn_processing to convert the frames to
the pixel format the model's input tensor expects.
Signed-off-by: younengxiao <[email protected]>
Reviewed-by: Guo Yejun <[email protected]>
---
Changelog | 1 +
configure | 5 +-
doc/filters.texi | 46 +-
doc/general_contents.texi | 33 ++
libavfilter/dnn/Makefile | 1 +
libavfilter/dnn/dnn_backend_onnx.c | 1104 ++++++++++++++++++++++++++++++++++++
libavfilter/dnn/dnn_interface.c | 8 +
libavfilter/dnn_filter_common.c | 25 +-
libavfilter/dnn_interface.h | 14 +-
libavfilter/vf_dnn_processing.c | 5 +-
10 files changed, 1234 insertions(+), 8 deletions(-)
diff --git a/Changelog b/Changelog
index 2ad3ee255f..8cfac4c1a7 100644
--- a/Changelog
+++ b/Changelog
@@ -18,6 +18,7 @@ version <next>:
- Remove ogg/celt parsing
- Bitstream filter to split Dolby Vision multi-layer HEVC
- Add AMF hardware memory mapping support.
+- ONNX Runtime DNN backend with GPU execution provider support
version 8.1:
diff --git a/configure b/configure
index a6bbb86807..c8a0eac79b 100755
--- a/configure
+++ b/configure
@@ -253,6 +253,7 @@ External library support:
--enable-libmp3lame enable MP3 encoding via libmp3lame [no]
--enable-libmpeghdec enable MPEG-H 3DA decoding via libmpeghdec [no]
--enable-liboapv enable APV encoding via liboapv [no]
+ --enable-libonnxruntime enable ONNX Runtime as a DNN module backend [no]
--enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb
[no]
--enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no]
--enable-libopencv enable video filtering via libopencv [no]
@@ -2108,6 +2109,7 @@ EXTERNAL_LIBRARY_LIST="
libmp3lame
libmysofa
liboapv
+ libonnxruntime
libopencv
libopencolorio
libopenh264
@@ -3089,7 +3091,7 @@ dirac_parse_select="golomb"
dovi_rpudec_select="golomb"
dovi_rpuenc_select="golomb"
dnn_deps="avformat swscale"
-dnn_deps_any="libtensorflow libopenvino libtorch"
+dnn_deps_any="libtensorflow libopenvino libtorch libonnxruntime"
error_resilience_select="me_cmp"
evcparse_select="golomb"
faandct_deps="faan"
@@ -7440,6 +7442,7 @@ enabled libnpp && { test_cpp_condition "$(cd
"$source_path"; pwd)/lib
die "ERROR: libnpp not found"; } &&
{ check_func_headers "nppi.h"
nppiYCbCr420_8u_P2P3R $libnpp_extralibs ||
die "ERROR: libnpp support is deprecated,
version 13.0 and up are not supported"; }
+enabled libonnxruntime && require libonnxruntime onnxruntime_c_api.h
OrtGetApiBase -lonnxruntime
enabled libopencore_amrnb && { check_pkg_config libopencore_amrnb
opencore-amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init ||
require libopencore_amrnb
opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb; }
enabled libopencore_amrwb && { check_pkg_config libopencore_amrwb
opencore-amrwb opencore-amrwb/dec_if.h D_IF_init ||
diff --git a/doc/filters.texi b/doc/filters.texi
index 2cae41c7c5..1a649cf794 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -12221,18 +12221,42 @@ and configure FFmpeg with @code{--enable-libtorch
--extra-cflags=-I/libtorch_root/libtorch/include/torch/csrc/api/include
--extra-ldflags=-L/libtorch_root/libtorch/lib/}
+@item onnx
+ONNX Runtime backend. To enable this backend you need to install the
+ONNX Runtime library (see @url{https://onnxruntime.ai/}) and configure
+FFmpeg with @code{--enable-libonnxruntime}.
+
+The current ONNX Runtime backend expects 4-D input and output tensors
+with NCHW layout and 32-bit floating-point element type (ONNX
+@code{FLOAT}); models with integer or other element types (e.g.
+@code{UINT8}) are not supported and will be rejected at load time.
+Models using NHWC layout or other ranks are not yet
+supported. Only single-input models are supported; the backend binds
+exactly one input tensor when running the model.
+
+The @option{input} and @option{output} options are optional for the
+ONNX Runtime backend; when they are omitted the backend resolves the
+tensor names from the session.
+
+The ONNX Runtime backend runs inference synchronously using a single
+inference request. The shared @option{async} and @option{nireq} options
+therefore have no effect for @code{dnn_backend=onnx}; inference always
+runs synchronously regardless of their values.
+
@end table
@item model
Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow, OpenVINO
-and Libtorch backend can load files for only its format.
+Note that different backends use different file formats. TensorFlow, OpenVINO,
+Libtorch, and ONNX Runtime backends can load files for only their respective
formats.
@item input
-Set the input name of the dnn network.
+Set the input name of the dnn network. Required for the TensorFlow backend;
+optional for the ONNX Runtime backend.
@item output
-Set the output name of the dnn network.
+Set the output name of the dnn network. Required for the TensorFlow backend;
+optional for the ONNX Runtime backend.
@item backend_configs
Set the configs to be passed into backend. To use async execution, set async
(default: set).
@@ -12241,6 +12265,20 @@ Roll back to sync execution if the backend does not
support async.
For tensorflow backend, you can set its configs with @option{sess_config}
options,
please use tools/python/tf_sess_config.py to get the configs of TensorFlow
backend for your system.
+@item device
+Set the device to run the model. For the ONNX Runtime backend this selects the
+execution provider: @code{cpu} (default), @code{cuda} (NVIDIA GPU),
+@code{dml} (DirectML, Windows only) or @code{vitisai} (AMD Ryzen AI NPU).
+
+@item device_id
+Set the device index used by GPU execution providers (e.g. @code{cuda} or
+@code{dml}) for the ONNX Runtime backend. Default is 0.
+
+@item threads_per_operation
+ONNX Runtime backend only. Set the number of CPU threads used per ONNX
+Runtime operator when running with @code{device=cpu}. Default is 0 (let
+ONNX Runtime choose automatically). Has no effect for GPU/NPU providers.
+
@end table
@subsection Examples
diff --git a/doc/general_contents.texi b/doc/general_contents.texi
index 5fed093642..7a1c4f3a21 100644
--- a/doc/general_contents.texi
+++ b/doc/general_contents.texi
@@ -205,6 +205,39 @@ FFmpeg can make use of this library, originating in
Modplug-XMMS, to read from M
See @url{https://github.com/Konstanty/libmodplug}. Pass
@code{--enable-libmodplug} to configure to
enable it.
+@section ONNX Runtime
+
+FFmpeg can make use of the ONNX Runtime library as a backend for DNN based
filters.
+
+Go to @url{https://onnxruntime.ai/} and follow the instructions for installing
+the library. Pre-built packages are available for various platforms.
+
+Configure FFmpeg with ONNX Runtime support:
+
+@example
+./configure \
+ --enable-libonnxruntime \
+ --extra-cflags="-I/path/to/onnxruntime/include" \
+ --extra-ldflags="-L/path/to/onnxruntime/lib"
+@end example
+
+On ELF-based systems (Linux, BSD) you may also want to embed the
+library search path so that the installed @file{ffmpeg} binary finds the
+shared library at run time without setting @env{LD_LIBRARY_PATH}:
+
+@example
+./configure \
+ --enable-libonnxruntime \
+ --extra-cflags="-I/path/to/onnxruntime/include" \
+ --extra-ldflags="-L/path/to/onnxruntime/lib
-Wl,-rpath,/path/to/onnxruntime/lib"
+@end example
+
+The @code{-Wl,-rpath} flag is ELF-specific and should be omitted on
+other platforms (Windows, macOS).
+
+For MinGW GCC builds, add the @code{-D_stdcall=__stdcall} flag for
compatibility
+with ONNX Runtime headers.
+
@section OpenCORE, VisualOn, and Fraunhofer libraries
Spun off Google Android sources, OpenCore, VisualOn and Fraunhofer
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 3d09927c98..7c5d7d8ab6 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -7,5 +7,6 @@ OBJS-$(CONFIG_DNN) +=
dnn/dnn_backend_common.o
DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o
DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o
+DNN-OBJS-$(CONFIG_LIBONNXRUNTIME) += dnn/dnn_backend_onnx.o
OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_onnx.c
b/libavfilter/dnn/dnn_backend_onnx.c
new file mode 100644
index 0000000000..0ff0ffb285
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_onnx.c
@@ -0,0 +1,1104 @@
+/*
+ * Copyright (c) 2026 Advanced Micro Devices, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN ONNX Runtime backend implementation.
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/avassert.h"
+#include "libavutil/mem.h"
+#include "libavutil/avstring.h"
+#include "libavutil/thread.h"
+#include "libavutil/wchar_filename.h"
+#include "../filters.h"
+#include "dnn_io_proc.h"
+#include "dnn_backend_common.h"
+#include "queue.h"
+#include "safe_queue.h"
+#include <onnxruntime_c_api.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef struct ONNXModel {
+ DNNModel model;
+ DnnContext *ctx;
+ OrtEnv *env;
+ OrtSession *session;
+ OrtSessionOptions *session_options;
+ OrtAllocator *allocator;
+ SafeQueue *request_queue;
+ Queue *task_queue;
+ Queue *lltask_queue;
+ DNNData input_info;
+ int input_resolved;
+ int output_resolved;
+} ONNXModel;
+
+typedef struct ONNXInferRequest {
+ OrtValue *input_tensor;
+ OrtValue *output_tensor;
+ void *input_data;
+} ONNXInferRequest;
+
+typedef struct ONNXRequestItem {
+ ONNXInferRequest *infer_request;
+ LastLevelTaskItem *lltask;
+ DNNAsyncExecModule exec_module;
+} ONNXRequestItem;
+
+#define OFFSET(x) offsetof(ONNXOptions, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption dnn_onnx_options[] = {
+ { "threads_per_operation", "number of CPU threads per ORT operator
(device=cpu only)",
+ OFFSET(num_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, 0,
INT_MAX, FLAGS },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(dnn_onnx);
+
+static const OrtApi *g_ort = NULL;
+static AVOnce g_ort_init_once = AV_ONCE_INIT;
+
+static void init_ort_api(void)
+{
+ g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
+}
+
+#define ORT_ABORT_ON_ERROR(expr) \
+ do { \
+ OrtStatus *status = (expr); \
+ if (status != NULL) { \
+ const char *msg = g_ort->GetErrorMessage(status); \
+ av_log(ctx, AV_LOG_ERROR, "ONNX Runtime error: %s\n", msg); \
+ g_ort->ReleaseStatus(status); \
+ goto err; \
+ } \
+ } while (0)
+
+static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
+{
+ ONNXModel *onnx_model = (ONNXModel *)task->model;
+ DnnContext *ctx = onnx_model->ctx;
+ LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask));
+
+ if (!lltask) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for
LastLevelTaskItem\n");
+ return AVERROR(ENOMEM);
+ }
+ task->inference_todo = 1;
+ task->inference_done = 0;
+ lltask->task = task;
+ if (ff_queue_push_back(lltask_queue, lltask) < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to push back lltask_queue.\n");
+ av_freep(&lltask);
+ return AVERROR(ENOMEM);
+ }
+ return 0;
+}
+
+static void onnx_free_request(ONNXInferRequest *request)
+{
+ if (!request)
+ return;
+ if (request->input_tensor) {
+ g_ort->ReleaseValue(request->input_tensor);
+ request->input_tensor = NULL;
+ }
+ av_freep(&request->input_data);
+ if (request->output_tensor) {
+ g_ort->ReleaseValue(request->output_tensor);
+ request->output_tensor = NULL;
+ }
+}
+
+static inline void destroy_request_item(ONNXRequestItem **arg)
+{
+ ONNXRequestItem *item;
+ if (!arg || !*arg)
+ return;
+ item = *arg;
+ onnx_free_request(item->infer_request);
+ av_freep(&item->infer_request);
+ av_freep(&item->lltask);
+ ff_dnn_async_module_cleanup(&item->exec_module);
+ av_freep(arg);
+}
+
+static void dnn_free_model_onnx(DNNModel **model)
+{
+ ONNXModel *onnx_model;
+ if (!model || !*model)
+ return;
+
+ onnx_model = (ONNXModel *)(*model);
+
+ while (ff_safe_queue_size(onnx_model->request_queue) != 0) {
+ ONNXRequestItem *item = (ONNXRequestItem
*)ff_safe_queue_pop_front(onnx_model->request_queue);
+ destroy_request_item(&item);
+ }
+ ff_safe_queue_destroy(onnx_model->request_queue);
+
+ while (ff_queue_size(onnx_model->lltask_queue) != 0) {
+ LastLevelTaskItem *item = (LastLevelTaskItem
*)ff_queue_pop_front(onnx_model->lltask_queue);
+ av_freep(&item);
+ }
+ ff_queue_destroy(onnx_model->lltask_queue);
+
+ while (ff_queue_size(onnx_model->task_queue) != 0) {
+ TaskItem *item = (TaskItem
*)ff_queue_pop_front(onnx_model->task_queue);
+ av_frame_free(&item->in_frame);
+ av_frame_free(&item->out_frame);
+ av_freep(&item);
+ }
+ ff_queue_destroy(onnx_model->task_queue);
+
+ if (onnx_model->session)
+ g_ort->ReleaseSession(onnx_model->session);
+ if (onnx_model->session_options)
+ g_ort->ReleaseSessionOptions(onnx_model->session_options);
+ if (onnx_model->env)
+ g_ort->ReleaseEnv(onnx_model->env);
+
+ av_freep(&onnx_model);
+ *model = NULL;
+}
+
+static int get_input_onnx(DNNModel *model, DNNData *input, const char
*input_name)
+{
+ ONNXModel *onnx_model = (ONNXModel *)model;
+ DnnContext *ctx = onnx_model->ctx;
+ OrtTypeInfo *type_info = NULL;
+ const OrtTensorTypeAndShapeInfo *tensor_info = NULL;
+ size_t num_dims;
+ size_t input_count = 0;
+ size_t input_index = 0;
+ int found_input = 0;
+ int64_t *dims;
+ ONNXTensorElementDataType tensor_type;
+ OrtStatus *status;
+
+ if (!input_name || !*input_name) {
+ av_log(ctx, AV_LOG_ERROR, "ONNX input name is not specified\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (onnx_model->input_resolved) {
+ *input = onnx_model->input_info;
+ return 0;
+ }
+
+ status = g_ort->SessionGetInputCount(onnx_model->session, &input_count);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to get input count: %s\n", msg);
+ g_ort->ReleaseStatus(status);
+ return AVERROR(EINVAL);
+ }
+
+ for (size_t i = 0; i < input_count; i++) {
+ char *name = NULL;
+ status = g_ort->SessionGetInputName(onnx_model->session, i,
+ onnx_model->allocator, &name);
+ if (status != NULL) {
+ g_ort->ReleaseStatus(status);
+ continue;
+ }
+ if (!strcmp(name, input_name)) {
+ input_index = i;
+ found_input = 1;
+ }
+ onnx_model->allocator->Free(onnx_model->allocator, name);
+ if (found_input)
+ break;
+ }
+
+ if (!found_input) {
+ av_log(ctx, AV_LOG_ERROR, "Input name '%s' not found in ONNX model\n",
+ input_name);
+ return AVERROR(EINVAL);
+ }
+
+ status = g_ort->SessionGetInputTypeInfo(onnx_model->session, input_index,
+ &type_info);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to get input type info: %s\n", msg);
+ g_ort->ReleaseStatus(status);
+ return AVERROR(EINVAL);
+ }
+
+ status = g_ort->CastTypeInfoToTensorInfo(type_info, &tensor_info);
+ if (status != NULL) {
+ g_ort->ReleaseTypeInfo(type_info);
+ g_ort->ReleaseStatus(status);
+ return AVERROR(EINVAL);
+ }
+
+ status = g_ort->GetDimensionsCount(tensor_info, &num_dims);
+ if (status != NULL) {
+ g_ort->ReleaseTypeInfo(type_info);
+ g_ort->ReleaseStatus(status);
+ return AVERROR(EINVAL);
+ }
+
+ if (num_dims != 4) {
+ avpriv_report_missing_feature(ctx, "Support for %zu dimensional
input", num_dims);
+ g_ort->ReleaseTypeInfo(type_info);
+ return AVERROR(ENOSYS);
+ }
+
+ dims = av_malloc(num_dims * sizeof(int64_t));
+ if (!dims) {
+ g_ort->ReleaseTypeInfo(type_info);
+ return AVERROR(ENOMEM);
+ }
+
+ g_ort->GetDimensions(tensor_info, dims, num_dims);
+ g_ort->GetTensorElementType(tensor_info, &tensor_type);
+
+ if (dims[0] > 1) {
+ av_log(ctx, AV_LOG_ERROR,
+ "ONNX model has fixed batch size %"PRId64", but the backend "
+ "only supports a batch size of 1\n", dims[0]);
+ av_free(dims);
+ g_ort->ReleaseTypeInfo(type_info);
+ return AVERROR(ENOSYS);
+ }
+
+ /*
+ * The ONNX backend assumes a 4-D NCHW input tensor (the rank check
+ * above already rejects anything else).
+ */
+ input->layout = DL_NCHW;
+ input->dims[0] = dims[0] > 0 ? dims[0] : 1;
+ input->dims[1] = dims[1] > 0 ? dims[1] : 3;
+ input->dims[2] = dims[2] > 0 ? dims[2] : -1;
+ input->dims[3] = dims[3] > 0 ? dims[3] : -1;
+
+ if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
+ input->dt = DNN_FLOAT;
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "Unsupported input tensor data type, only
float is supported\n");
+ av_free(dims);
+ g_ort->ReleaseTypeInfo(type_info);
+ return AVERROR(ENOSYS);
+ }
+
+ /*
+ * The DCO_RGB setting below is only consulted by the dnn_detect and
dnn_classify;
+ * the dnn_processing path lets the source AVFrame pixel format determine
the
+ * tensor channel order, so both RGB24 and BGR24 inputs work transparently
+ * for that flow.
+ */
+ input->order = DCO_RGB;
+ av_free(dims);
+ g_ort->ReleaseTypeInfo(type_info);
+
+ onnx_model->input_info = *input;
+ onnx_model->input_resolved = 1;
+ return 0;
+}
+
+static int fill_model_input_onnx(ONNXModel *onnx_model, ONNXRequestItem
*request)
+{
+ LastLevelTaskItem *lltask = NULL;
+ TaskItem *task = NULL;
+ ONNXInferRequest *infer_request = NULL;
+ DNNData input = { 0 };
+ DnnContext *ctx = onnx_model->ctx;
+ int ret, width_idx, height_idx, channel_idx;
+ int64_t input_shape[4];
+ size_t input_tensor_size;
+ OrtMemoryInfo *memory_info;
+ OrtStatus *status;
+
+ lltask = (LastLevelTaskItem *)ff_queue_pop_front(onnx_model->lltask_queue);
+ if (!lltask) {
+ ret = AVERROR(EINVAL);
+ goto err;
+ }
+ request->lltask = lltask;
+ task = lltask->task;
+ infer_request = request->infer_request;
+
+ ret = get_input_onnx(&onnx_model->model, &input, task->input_name);
+ if (ret != 0) {
+ goto err;
+ }
+
+ width_idx = dnn_get_width_idx_by_layout(input.layout);
+ height_idx = dnn_get_height_idx_by_layout(input.layout);
+ channel_idx = dnn_get_channel_idx_by_layout(input.layout);
+
+ input.dims[height_idx] = task->in_frame->height;
+ input.dims[width_idx] = task->in_frame->width;
+
+ input_shape[0] = input.dims[0];
+ input_shape[1] = input.dims[channel_idx];
+ input_shape[2] = input.dims[height_idx];
+ input_shape[3] = input.dims[width_idx];
+
+ input_tensor_size = input_shape[0] * input_shape[1] * input_shape[2] *
input_shape[3];
+ input_tensor_size *= sizeof(float);
+
+ input.data = av_malloc(input_tensor_size);
+ if (!input.data) {
+ ret = AVERROR(ENOMEM);
+ goto err;
+ }
+ infer_request->input_data = input.data;
+
+ switch (onnx_model->model.func_type) {
+ case DFT_PROCESS_FRAME:
+ input.scale = 255;
+ if (task->do_ioproc) {
+ if (onnx_model->model.frame_pre_proc != NULL) {
+ onnx_model->model.frame_pre_proc(task->in_frame, &input,
onnx_model->model.filter_ctx);
+ } else {
+ ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx);
+ }
+ }
+ break;
+ case DFT_ANALYTICS_DETECT:
+ ff_frame_to_dnn_detect(task->in_frame, &input, ctx);
+ break;
+ default:
+ avpriv_report_missing_feature(ctx, "model function type %d",
onnx_model->model.func_type);
+ ret = AVERROR(ENOSYS);
+ goto err;
+ }
+
+ status = g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault,
&memory_info);
+ if (status != NULL) {
+ ret = AVERROR(ENOMEM);
+ goto err;
+ }
+
+ status = g_ort->CreateTensorWithDataAsOrtValue(
+ memory_info, input.data, input_tensor_size,
+ input_shape, 4, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
+ &infer_request->input_tensor);
+
+ g_ort->ReleaseMemoryInfo(memory_info);
+
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to create input tensor: %s\n", msg);
+ g_ort->ReleaseStatus(status);
+ ret = AVERROR(ENOMEM);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ onnx_free_request(infer_request);
+ return ret;
+}
+
+static int onnx_start_inference(void *args)
+{
+ ONNXRequestItem *request = (ONNXRequestItem *)args;
+ ONNXInferRequest *infer_request = NULL;
+ LastLevelTaskItem *lltask = NULL;
+ TaskItem *task = NULL;
+ ONNXModel *onnx_model = NULL;
+ DnnContext *ctx = NULL;
+ OrtStatus *status;
+ const char *input_names[1];
+ const char *output_names[1];
+
+ if (!request) {
+ av_log(NULL, AV_LOG_ERROR, "ONNXRequestItem is NULL\n");
+ return AVERROR(EINVAL);
+ }
+
+ infer_request = request->infer_request;
+ lltask = request->lltask;
+ task = lltask->task;
+ onnx_model = (ONNXModel *)task->model;
+ ctx = onnx_model->ctx;
+
+ if (task->nb_output > 1) {
+ avpriv_report_missing_feature(ctx,
+ "Multiple output tensors (%u) for ONNX backend", task->nb_output);
+ return AVERROR(ENOSYS);
+ }
+
+ if (!task->input_name || !task->output_names || !task->output_names[0]) {
+ av_log(ctx, AV_LOG_ERROR,
+ "ONNX backend: input/output tensor name was not resolved at
load time\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (!infer_request->input_tensor) {
+ av_log(ctx, AV_LOG_ERROR, "Input tensor is NULL\n");
+ return DNN_GENERIC_ERROR;
+ }
+
+ if (!onnx_model->output_resolved) {
+ size_t output_count = 0;
+ int found_output = 0;
+
+ status = g_ort->SessionGetOutputCount(onnx_model->session,
&output_count);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to get output count: %s\n", msg);
+ g_ort->ReleaseStatus(status);
+ return AVERROR(EINVAL);
+ }
+
+ for (size_t i = 0; i < output_count; i++) {
+ char *name = NULL;
+ status = g_ort->SessionGetOutputName(onnx_model->session, i,
+ onnx_model->allocator, &name);
+ if (status != NULL) {
+ g_ort->ReleaseStatus(status);
+ continue;
+ }
+ if (!strcmp(name, task->output_names[0]))
+ found_output = 1;
+ onnx_model->allocator->Free(onnx_model->allocator, name);
+ if (found_output)
+ break;
+ }
+
+ if (!found_output) {
+ av_log(ctx, AV_LOG_ERROR,
+ "Output name '%s' not found in ONNX model\n",
+ task->output_names[0]);
+ return AVERROR(EINVAL);
+ }
+
+ onnx_model->output_resolved = 1;
+ }
+
+ input_names[0] = task->input_name;
+ output_names[0] = task->output_names[0];
+
+ status = g_ort->Run(onnx_model->session, NULL,
+ input_names, (const OrtValue *const
*)&infer_request->input_tensor, 1,
+ output_names, 1, &infer_request->output_tensor);
+
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "ONNX inference failed: %s\n", msg);
+ g_ort->ReleaseStatus(status);
+ return DNN_GENERIC_ERROR;
+ }
+
+ return 0;
+}
+
+static void infer_completion_callback(void *args)
+{
+ ONNXRequestItem *request = (ONNXRequestItem *)args;
+ LastLevelTaskItem *lltask = request->lltask;
+ TaskItem *task = lltask->task;
+ DNNData outputs = { 0 };
+ ONNXInferRequest *infer_request = request->infer_request;
+ ONNXModel *onnx_model = (ONNXModel *)task->model;
+ DnnContext *ctx = onnx_model->ctx;
+ OrtTensorTypeAndShapeInfo *tensor_info;
+ ONNXTensorElementDataType tensor_type;
+ size_t num_dims;
+ int64_t *dims;
+ void *output_data;
+ OrtStatus *status;
+
+ if (!infer_request->output_tensor) {
+ av_log(ctx, AV_LOG_ERROR, "Output tensor is NULL\n");
+ goto err;
+ }
+
+ status = g_ort->GetTensorTypeAndShape(infer_request->output_tensor,
&tensor_info);
+ if (status != NULL) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get output tensor info\n");
+ g_ort->ReleaseStatus(status);
+ goto err;
+ }
+
+ g_ort->GetDimensionsCount(tensor_info, &num_dims);
+ dims = av_malloc(num_dims * sizeof(int64_t));
+ if (!dims) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for
dimensions\n");
+ g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+ goto err;
+ }
+ g_ort->GetDimensions(tensor_info, dims, num_dims);
+
+ /* Output is interpreted as NCHW, matching the input assumption. */
+ outputs.layout = DL_NCHW;
+ outputs.order = DCO_RGB;
+
+ g_ort->GetTensorElementType(tensor_info, &tensor_type);
+ if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
+ outputs.dt = DNN_FLOAT;
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "Unsupported output tensor data type, only
float is supported\n");
+ av_free(dims);
+ g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+ goto err;
+ }
+
+ if (num_dims == 4) {
+ outputs.dims[0] = dims[0];
+ outputs.dims[1] = dims[1];
+ outputs.dims[2] = dims[2];
+ outputs.dims[3] = dims[3];
+ } else {
+ avpriv_report_missing_feature(ctx, "Support for %zu dimensional
output", num_dims);
+ av_free(dims);
+ g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+ goto err;
+ }
+
+ status = g_ort->GetTensorMutableData(infer_request->output_tensor,
&output_data);
+ if (status != NULL) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get tensor data\n");
+ g_ort->ReleaseStatus(status);
+ av_free(dims);
+ g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+ goto err;
+ }
+
+ outputs.data = output_data;
+
+ switch (onnx_model->model.func_type) {
+ case DFT_PROCESS_FRAME:
+ if (task->do_ioproc) {
+ outputs.scale = 255;
+ if (onnx_model->model.frame_post_proc != NULL) {
+ onnx_model->model.frame_post_proc(task->out_frame, &outputs,
onnx_model->model.filter_ctx);
+ } else {
+ ff_proc_from_dnn_to_frame(task->out_frame, &outputs, ctx);
+ }
+ } else {
+ task->out_frame->width =
outputs.dims[dnn_get_width_idx_by_layout(outputs.layout)];
+ task->out_frame->height =
outputs.dims[dnn_get_height_idx_by_layout(outputs.layout)];
+ }
+ break;
+ default:
+ avpriv_report_missing_feature(ctx, "model function type %d",
onnx_model->model.func_type);
+ av_free(dims);
+ g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+ goto err;
+ }
+
+ av_free(dims);
+ g_ort->ReleaseTensorTypeAndShapeInfo(tensor_info);
+ task->inference_done++;
+
+err:
+ av_freep(&request->lltask);
+ onnx_free_request(infer_request);
+ if (ff_safe_queue_push_back(onnx_model->request_queue, request) < 0) {
+ destroy_request_item(&request);
+ av_log(ctx, AV_LOG_ERROR, "Unable to push back request_queue.\n");
+ }
+}
+
+static int execute_model_onnx(ONNXRequestItem *request, Queue *lltask_queue)
+{
+ ONNXModel *onnx_model = NULL;
+ LastLevelTaskItem *lltask;
+ TaskItem *task = NULL;
+ int ret = 0;
+
+ if (ff_queue_size(lltask_queue) == 0) {
+ destroy_request_item(&request);
+ return 0;
+ }
+
+ lltask = (LastLevelTaskItem *)ff_queue_peek_front(lltask_queue);
+ if (lltask == NULL) {
+ av_log(NULL, AV_LOG_ERROR, "Failed to get LastLevelTaskItem\n");
+ destroy_request_item(&request);
+ return AVERROR(EINVAL);
+ }
+ task = lltask->task;
+ onnx_model = (ONNXModel *)task->model;
+
+ ret = fill_model_input_onnx(onnx_model, request);
+ if (ret != 0) {
+ goto err;
+ }
+
+ if (task->async) {
+ avpriv_report_missing_feature(onnx_model->ctx, "ONNX async inference");
+ ret = AVERROR(ENOSYS);
+ goto err;
+ } else {
+ ret = onnx_start_inference((void *)request);
+ if (ret != 0) {
+ goto err;
+ }
+ infer_completion_callback(request);
+ return (task->inference_done == task->inference_todo) ? 0 :
DNN_GENERIC_ERROR;
+ }
+
+err:
+ av_freep(&request->lltask);
+ onnx_free_request(request->infer_request);
+ if (ff_safe_queue_push_back(onnx_model->request_queue, request) < 0) {
+ destroy_request_item(&request);
+ }
+ return ret;
+}
+
+static int get_output_onnx(DNNModel *model, const char *input_name, int
input_width, int input_height,
+ const char *output_name, int *output_width, int
*output_height)
+{
+ int ret = 0;
+ ONNXModel *onnx_model = (ONNXModel *)model;
+ DnnContext *ctx = onnx_model->ctx;
+ TaskItem task = { 0 };
+ ONNXRequestItem *request = NULL;
+ DNNExecBaseParams exec_params = {
+ .input_name = input_name,
+ .output_names = &output_name,
+ .nb_output = 1,
+ .in_frame = NULL,
+ .out_frame = NULL,
+ };
+
+ ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, onnx_model,
input_height, input_width, ctx);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = extract_lltask_from_task(&task, onnx_model->lltask_queue);
+ if (ret != 0) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to extract last level task from
task.\n");
+ goto err;
+ }
+
+ request = (ONNXRequestItem
*)ff_safe_queue_pop_front(onnx_model->request_queue);
+ if (!request) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get infer request.\n");
+ ret = AVERROR(EINVAL);
+ goto err;
+ }
+
+ ret = execute_model_onnx(request, onnx_model->lltask_queue);
+ *output_width = task.out_frame->width;
+ *output_height = task.out_frame->height;
+
+err:
+ av_frame_free(&task.out_frame);
+ av_frame_free(&task.in_frame);
+ return ret;
+}
+
+static ONNXInferRequest *onnx_create_inference_request(void)
+{
+ ONNXInferRequest *request = av_malloc(sizeof(ONNXInferRequest));
+ if (!request)
+ return NULL;
+ request->input_tensor = NULL;
+ request->output_tensor = NULL;
+ request->input_data = NULL;
+ return request;
+}
+
+static DNNModel *dnn_load_model_onnx(DnnContext *ctx, DNNFunctionType
func_type, AVFilterContext *filter_ctx)
+{
+ DNNModel *model = NULL;
+ ONNXModel *onnx_model = NULL;
+ ONNXRequestItem *item = NULL;
+ ONNXOptions *options = &ctx->onnx_option;
+ OrtStatus *status;
+
+ ff_thread_once(&g_ort_init_once, init_ort_api);
+ if (!g_ort) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get ONNX Runtime API\n");
+ return NULL;
+ }
+
+ onnx_model = av_mallocz(sizeof(ONNXModel));
+ if (!onnx_model)
+ return NULL;
+
+ model = &onnx_model->model;
+ onnx_model->ctx = ctx;
+
+ status = g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "FFmpeg",
&onnx_model->env);
+ if (status != NULL) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create ONNX Runtime
environment\n");
+ goto fail;
+ }
+
+ status = g_ort->CreateSessionOptions(&onnx_model->session_options);
+ if (status != NULL) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create session options\n");
+ goto fail;
+ }
+
+ if (options->num_threads > 0 &&
+ (!ctx->device || av_strcasecmp(ctx->device, "cpu") == 0)) {
+ g_ort->SetIntraOpNumThreads(onnx_model->session_options,
options->num_threads);
+ }
+ g_ort->SetSessionGraphOptimizationLevel(onnx_model->session_options,
ORT_ENABLE_ALL);
+
+ if (ctx->device && av_strcasecmp(ctx->device, "cpu") != 0) {
+ if (av_strcasecmp(ctx->device, "cuda") == 0) {
+ if (g_ort->SessionOptionsAppendExecutionProvider_CUDA) {
+ OrtCUDAProviderOptions cuda_options;
+ memset(&cuda_options, 0, sizeof(cuda_options));
+ cuda_options.device_id = ctx->device_id;
+
+ status = g_ort->SessionOptionsAppendExecutionProvider_CUDA(
+ onnx_model->session_options, &cuda_options);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_WARNING, "Failed to enable CUDA (device
%d): %s. Falling back to CPU\n",
+ ctx->device_id, msg);
+ g_ort->ReleaseStatus(status);
+ } else {
+ av_log(ctx, AV_LOG_INFO, "Using CUDA execution provider on
device %d\n", ctx->device_id);
+ }
+ } else {
+ av_log(ctx, AV_LOG_WARNING, "CUDA provider function not
available in this ONNX Runtime API version. Falling back to CPU\n");
+ }
+ } else if (av_strcasecmp(ctx->device, "dml") == 0) {
+#ifdef _WIN32
+ const char* dml_options_keys[] = {"device_id"};
+ const char* dml_options_values[] = {NULL};
+ char device_id_str[32];
+ snprintf(device_id_str, sizeof(device_id_str), "%d",
ctx->device_id);
+ dml_options_values[0] = device_id_str;
+
+ /* DirectML cannot use ORT's memory-pattern optimizer and only
+ * supports sequential execution. */
+ status =
g_ort->SetSessionExecutionMode(onnx_model->session_options, ORT_SEQUENTIAL);
+ if (status)
+ g_ort->ReleaseStatus(status);
+ status = g_ort->DisableMemPattern(onnx_model->session_options);
+ if (status)
+ g_ort->ReleaseStatus(status);
+
+ if (g_ort->SessionOptionsAppendExecutionProvider) {
+ status = g_ort->SessionOptionsAppendExecutionProvider(
+ onnx_model->session_options, "DML",
+ dml_options_keys, dml_options_values, 1);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_WARNING, "Failed to enable DirectML
(device %d): %s. Falling back to CPU\n",
+ ctx->device_id, msg);
+ g_ort->ReleaseStatus(status);
+ } else {
+ av_log(ctx, AV_LOG_INFO, "Using DirectML execution
provider on device %d\n", ctx->device_id);
+ }
+ } else {
+ av_log(ctx, AV_LOG_WARNING, "DirectML provider function not
available in this ONNX Runtime API version. Falling back to CPU\n");
+ }
+#else
+ av_log(ctx, AV_LOG_WARNING, "DirectML is only available on
Windows. Falling back to CPU\n");
+#endif
+ } else if (av_strcasecmp(ctx->device, "vitisai") == 0) {
+ if (g_ort->SessionOptionsAppendExecutionProvider) {
+ status = g_ort->SessionOptionsAppendExecutionProvider(
+ onnx_model->session_options, "VitisAI",
+ NULL, NULL, 0);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_WARNING,
+ "Failed to enable VitisAI EP: %s. Falling back to
CPU\n", msg);
+ g_ort->ReleaseStatus(status);
+ } else {
+ av_log(ctx, AV_LOG_INFO, "Using VitisAI execution provider
(AMD Ryzen AI NPU)\n");
+ }
+ } else {
+ av_log(ctx, AV_LOG_WARNING,
+ "VitisAI provider function not available in this ONNX
Runtime API version. Falling back to CPU.\n");
+ }
+ } else {
+#ifdef _WIN32
+ av_log(ctx, AV_LOG_WARNING,
+ "Unknown device '%s'. Supported: cpu, cuda, dml, vitisai.
Using CPU\n",
+ ctx->device);
+#else
+ av_log(ctx, AV_LOG_WARNING,
+ "Unknown device '%s'. Supported: cpu, cuda, vitisai. Using
CPU\n",
+ ctx->device);
+#endif
+ }
+ } else {
+ av_log(ctx, AV_LOG_INFO, "Using CPU execution provider\n");
+ }
+
+#ifdef _WIN32
+ {
+ wchar_t *wfilename = NULL;
+ if (utf8towchar(ctx->model_filename, &wfilename)) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to convert model filename to
UTF-16\n");
+ goto fail;
+ }
+ if (!wfilename) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to convert model filename to
UTF-16\n");
+ goto fail;
+ }
+
+ status = g_ort->CreateSession(onnx_model->env, wfilename,
+ onnx_model->session_options,
&onnx_model->session);
+ av_free(wfilename);
+ }
+#else
+ status = g_ort->CreateSession(onnx_model->env, ctx->model_filename,
+ onnx_model->session_options,
&onnx_model->session);
+#endif
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to create ONNX session: %s\n", msg);
+ g_ort->ReleaseStatus(status);
+ goto fail;
+ }
+
+ status = g_ort->GetAllocatorWithDefaultOptions(&onnx_model->allocator);
+ if (status != NULL) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get allocator\n");
+ goto fail;
+ }
+
+ /*
+ * The ONNX backend binds exactly one input tensor to Run(), so only
+ * single-input models are supported.
+ */
+ {
+ size_t input_count = 0;
+ status = g_ort->SessionGetInputCount(onnx_model->session,
&input_count);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to get model input count: %s\n",
msg);
+ g_ort->ReleaseStatus(status);
+ goto fail;
+ }
+ if (input_count == 0) {
+ av_log(ctx, AV_LOG_ERROR, "ONNX model exposes no input tensors\n");
+ goto fail;
+ }
+ if (input_count > 1) {
+ av_log(ctx, AV_LOG_ERROR,
+ "ONNX model exposes %zu input tensors; the ONNX backend "
+ "supports single-input models only.\n",
+ input_count);
+ goto fail;
+ }
+ }
+
+ /* Auto-detect the input tensor name when the user did not pass
input=NAME. */
+ if (!ctx->model_inputname || !*ctx->model_inputname) {
+ char *name = NULL;
+ status = g_ort->SessionGetInputName(onnx_model->session, 0,
+ onnx_model->allocator, &name);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to get model input name: %s\n",
msg);
+ g_ort->ReleaseStatus(status);
+ goto fail;
+ }
+ av_freep(&ctx->model_inputname);
+ ctx->model_inputname = av_strdup(name);
+ onnx_model->allocator->Free(onnx_model->allocator, name);
+ if (!ctx->model_inputname)
+ goto fail;
+ av_log(ctx, AV_LOG_INFO, "Auto-detected ONNX input tensor '%s'\n",
+ ctx->model_inputname);
+ }
+
+ /* Auto-detect the output tensor name when the user did not pass
output=NAME. */
+ if (!ctx->model_outputnames) {
+ size_t output_count = 0;
+ char *name = NULL;
+ status = g_ort->SessionGetOutputCount(onnx_model->session,
&output_count);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to get model output count:
%s\n", msg);
+ g_ort->ReleaseStatus(status);
+ goto fail;
+ }
+ if (output_count == 0) {
+ av_log(ctx, AV_LOG_ERROR, "ONNX model exposes no output
tensors\n");
+ goto fail;
+ }
+ status = g_ort->SessionGetOutputName(onnx_model->session, 0,
+ onnx_model->allocator, &name);
+ if (status != NULL) {
+ const char *msg = g_ort->GetErrorMessage(status);
+ av_log(ctx, AV_LOG_ERROR, "Failed to get model output name: %s\n",
msg);
+ g_ort->ReleaseStatus(status);
+ goto fail;
+ }
+ ctx->model_outputnames = av_calloc(1, sizeof(*ctx->model_outputnames));
+ if (!ctx->model_outputnames) {
+ onnx_model->allocator->Free(onnx_model->allocator, name);
+ goto fail;
+ }
+ ctx->model_outputnames[0] = av_strdup(name);
+ onnx_model->allocator->Free(onnx_model->allocator, name);
+ if (!ctx->model_outputnames[0]) {
+ av_freep(&ctx->model_outputnames);
+ goto fail;
+ }
+ ctx->nb_outputs = 1;
+ if (output_count == 1) {
+ av_log(ctx, AV_LOG_INFO, "Auto-detected ONNX output tensor '%s'\n",
+ ctx->model_outputnames[0]);
+ } else {
+ av_log(ctx, AV_LOG_WARNING,
+ "ONNX model exposes %zu output tensors; auto-using index 0
('%s'). "
+ "Specify output=NAME to choose a different one.\n",
+ output_count, ctx->model_outputnames[0]);
+ }
+ }
+
+ onnx_model->request_queue = ff_safe_queue_create();
+ if (!onnx_model->request_queue) {
+ goto fail;
+ }
+
+ item = av_mallocz(sizeof(ONNXRequestItem));
+ if (!item) {
+ goto fail;
+ }
+ item->lltask = NULL;
+ item->infer_request = onnx_create_inference_request();
+ if (!item->infer_request) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for ONNX
inference request\n");
+ goto fail;
+ }
+ item->exec_module.start_inference = &onnx_start_inference;
+ item->exec_module.callback = &infer_completion_callback;
+ item->exec_module.args = item;
+
+ if (ff_safe_queue_push_back(onnx_model->request_queue, item) < 0) {
+ goto fail;
+ }
+ item = NULL;
+
+ onnx_model->task_queue = ff_queue_create();
+ if (!onnx_model->task_queue) {
+ goto fail;
+ }
+
+ onnx_model->lltask_queue = ff_queue_create();
+ if (!onnx_model->lltask_queue) {
+ goto fail;
+ }
+
+ model->get_input = &get_input_onnx;
+ model->get_output = &get_output_onnx;
+ model->filter_ctx = filter_ctx;
+ model->func_type = func_type;
+
+ return model;
+
+fail:
+ if (item) {
+ destroy_request_item(&item);
+ }
+ dnn_free_model_onnx(&model);
+ return NULL;
+}
+
+static int dnn_execute_model_onnx(const DNNModel *model, DNNExecBaseParams
*exec_params)
+{
+ ONNXModel *onnx_model = (ONNXModel *)model;
+ DnnContext *ctx = onnx_model->ctx;
+ TaskItem *task;
+ ONNXRequestItem *request;
+ int ret = 0;
+
+ ret = ff_check_exec_params(ctx, DNN_ONNX, model->func_type, exec_params);
+ if (ret != 0) {
+ av_log(ctx, AV_LOG_ERROR, "Exec parameter checking failed.\n");
+ return ret;
+ }
+
+ task = av_malloc(sizeof(TaskItem));
+ if (!task) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to alloc memory for task item.\n");
+ return AVERROR(ENOMEM);
+ }
+
+ ret = ff_dnn_fill_task(task, exec_params, onnx_model, 0, 1);
+ if (ret != 0) {
+ av_freep(&task);
+ av_log(ctx, AV_LOG_ERROR, "Unable to fill task.\n");
+ return ret;
+ }
+
+ ret = ff_queue_push_back(onnx_model->task_queue, task);
+ if (ret < 0) {
+ av_freep(&task);
+ av_log(ctx, AV_LOG_ERROR, "Unable to push back task_queue.\n");
+ return ret;
+ }
+
+ ret = extract_lltask_from_task(task, onnx_model->lltask_queue);
+ if (ret != 0) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to extract last level task from
task.\n");
+ return ret;
+ }
+
+ request = (ONNXRequestItem
*)ff_safe_queue_pop_front(onnx_model->request_queue);
+ if (!request) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get infer request.\n");
+ return AVERROR(EINVAL);
+ }
+
+ return execute_model_onnx(request, onnx_model->lltask_queue);
+}
+
+static DNNAsyncStatusType dnn_get_result_onnx(const DNNModel *model, AVFrame
**in, AVFrame **out)
+{
+ ONNXModel *onnx_model = (ONNXModel *)model;
+ return ff_dnn_get_result_common(onnx_model->task_queue, in, out);
+}
+
+static int dnn_flush_onnx(const DNNModel *model)
+{
+ ONNXModel *onnx_model = (ONNXModel *)model;
+ ONNXRequestItem *request;
+
+ if (ff_queue_size(onnx_model->lltask_queue) == 0)
+ return 0;
+
+ request = (ONNXRequestItem
*)ff_safe_queue_pop_front(onnx_model->request_queue);
+ if (!request) {
+ av_log(onnx_model->ctx, AV_LOG_ERROR, "Unable to get infer
request.\n");
+ return AVERROR(EINVAL);
+ }
+
+ return execute_model_onnx(request, onnx_model->lltask_queue);
+}
+
+const DNNModule ff_dnn_backend_onnx = {
+ .clazz = DNN_DEFINE_CLASS(dnn_onnx),
+ .type = DNN_ONNX,
+ .load_model = dnn_load_model_onnx,
+ .execute_model = dnn_execute_model_onnx,
+ .get_result = dnn_get_result_onnx,
+ .flush = dnn_flush_onnx,
+ .free_model = dnn_free_model_onnx,
+};
diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c
index 7080ab12e4..010677dd81 100644
--- a/libavfilter/dnn/dnn_interface.c
+++ b/libavfilter/dnn/dnn_interface.c
@@ -33,6 +33,9 @@
extern const DNNModule ff_dnn_backend_openvino;
extern const DNNModule ff_dnn_backend_tf;
extern const DNNModule ff_dnn_backend_torch;
+#if CONFIG_LIBONNXRUNTIME
+extern const DNNModule ff_dnn_backend_onnx;
+#endif
#define OFFSET(x) offsetof(DnnContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
@@ -53,6 +56,8 @@ static const AVOption dnn_base_options[] = {
OFFSET(async), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS},
{"device", "device to run model",
OFFSET(device), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0,
FLAGS},
+ {"device_id", "device ID to run model",
+ OFFSET(device_id), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX,
FLAGS},
{NULL}
};
@@ -78,6 +83,9 @@ static const DnnBackendInfo dnn_backend_info_list[] = {
#if CONFIG_LIBTORCH
{offsetof(DnnContext, torch_option), .module = &ff_dnn_backend_torch},
#endif
+#if CONFIG_LIBONNXRUNTIME
+ {offsetof(DnnContext, onnx_option), .module = &ff_dnn_backend_onnx},
+#endif
};
const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx)
diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c
index 6b9c6f8d7f..73c5e6b33c 100644
--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@@ -31,12 +31,15 @@ static char **separate_output_names(const char *expr, const
char *val_sep, int *
return NULL;
}
- parsed_vals = av_calloc(MAX_SUPPORTED_OUTPUTS_NB, sizeof(*parsed_vals));
+ parsed_vals = av_calloc(MAX_SUPPORTED_OUTPUTS_NB + 1,
sizeof(*parsed_vals));
if (!parsed_vals) {
return NULL;
}
do {
+ if (val_num >= MAX_SUPPORTED_OUTPUTS_NB) {
+ goto err;
+ }
val = av_get_token(&expr, val_sep);
if(val) {
parsed_vals[val_num] = val;
@@ -51,6 +54,12 @@ static char **separate_output_names(const char *expr, const
char *val_sep, int *
*separated_nb = val_num;
return parsed_vals;
+
+err:
+ for (int i = 0; i < val_num; i++)
+ av_free(parsed_vals[i]);
+ av_freep(&parsed_vals);
+ return NULL;
}
typedef struct DnnFilterBase {
@@ -97,6 +106,20 @@ int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type,
AVFilterContext *fil
av_log(filter_ctx, AV_LOG_ERROR, "could not parse model output
names\n");
return AVERROR(EINVAL);
}
+ } else if (backend == DNN_ONNX) {
+ /* ONNX: input and output tensor names are optional.*/
+ if (ctx->model_outputnames_string) {
+ ctx->model_outputnames =
separate_output_names(ctx->model_outputnames_string, "&", &ctx->nb_outputs);
+ if (!ctx->model_outputnames) {
+ av_log(filter_ctx, AV_LOG_ERROR, "could not parse model output
names\n");
+ return AVERROR(EINVAL);
+ }
+ if (ctx->nb_outputs != 1) {
+ av_log(filter_ctx, AV_LOG_ERROR,
+ "ONNX backend supports a single output name only\n");
+ return AVERROR(EINVAL);
+ }
+ }
}
ctx->dnn_module = ff_get_dnn_module(ctx->backend_type, filter_ctx);
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 66086409be..69a8b0a669 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -35,7 +35,8 @@
typedef enum {
DNN_TF = 1,
DNN_OV = 1 << 1,
- DNN_TH = 1 << 2
+ DNN_TH = 1 << 2,
+ DNN_ONNX = 1 << 3
} DNNBackendType;
typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType;
@@ -138,6 +139,13 @@ typedef struct THOptions {
int optimize;
} THOptions;
+#if CONFIG_LIBONNXRUNTIME
+typedef struct ONNXOptions {
+ const AVClass *clazz;
+ int num_threads;
+} ONNXOptions;
+#endif
+
typedef struct DNNModule DNNModule;
typedef struct DnnContext {
@@ -158,6 +166,7 @@ typedef struct DnnContext {
int nireq;
char *device;
+ int device_id;
#if CONFIG_LIBTENSORFLOW
TFOptions tf_option;
@@ -169,6 +178,9 @@ typedef struct DnnContext {
#if CONFIG_LIBTORCH
THOptions torch_option;
#endif
+#if CONFIG_LIBONNXRUNTIME
+ ONNXOptions onnx_option;
+#endif
} DnnContext;
// Stores pointers to functions for loading, executing, freeing DNN models for
one of the backends.
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index 0771ceb5fc..7ffa700cc5 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -52,11 +52,14 @@ static const AVOption dnn_processing_options[] = {
#endif
#if (CONFIG_LIBTORCH == 1)
{ "torch", "torch backend flag", 0,
AV_OPT_TYPE_CONST, { .i64 = DNN_TH }, 0, 0, FLAGS, "backend" },
+#endif
+#if (CONFIG_LIBONNXRUNTIME == 1)
+ { "onnx", "onnx backend flag", 0,
AV_OPT_TYPE_CONST, { .i64 = DNN_ONNX }, 0, 0, FLAGS, "backend" },
#endif
{ NULL }
};
-AVFILTER_DNN_DEFINE_CLASS(dnn_processing, DNN_TF | DNN_OV | DNN_TH);
+AVFILTER_DNN_DEFINE_CLASS(dnn_processing, DNN_TF | DNN_OV | DNN_TH | DNN_ONNX);
static av_cold int init(AVFilterContext *context)
{
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]