On Sun, 19 Feb 2017 18:46:40 +0000
Mark Thompson <[email protected]> wrote:
> ---
> configure | 3 +
> doc/APIchanges | 4 +
> libavutil/Makefile | 2 +
> libavutil/hwcontext.c | 3 +
> libavutil/hwcontext.h | 1 +
> libavutil/hwcontext_internal.h | 1 +
> libavutil/hwcontext_opencl.c | 983
> +++++++++++++++++++++++++++++++++++++++++
> libavutil/hwcontext_opencl.h | 78 ++++
> libavutil/version.h | 2 +-
> 9 files changed, 1076 insertions(+), 1 deletion(-)
> create mode 100644 libavutil/hwcontext_opencl.c
> create mode 100644 libavutil/hwcontext_opencl.h
>
> diff --git a/configure b/configure
> index 4635b73b0..5b78131ab 100755
> --- a/configure
> +++ b/configure
> @@ -244,6 +244,7 @@ External library support:
> --enable-nvenc Nvidia video encoding
> --enable-omx OpenMAX IL
> --enable-omx-rpi OpenMAX IL for Raspberry Pi
> + --enable-opencl OpenCL processing
> --enable-vaapi Video Acceleration API (mainly Unix/Intel)
> --enable-vda Apple Video Decode Acceleration [auto]
> --enable-vdpau Nvidia Video Decode and Presentation API for Unix [auto]
> @@ -1267,6 +1268,7 @@ HWACCEL_LIBRARY_LIST="
> mmal
> nvenc
> omx
> + opencl
> vaapi
> vda
> vdpau
> @@ -4733,6 +4735,7 @@ enabled omx_rpi && { check_header OMX_Core.h
> ||
> { ! enabled cross_compile && add_cflags
> -isystem/opt/vc/include/IL && check_header OMX_Core.h ; } ||
> die "ERROR: OpenMAX IL headers not found"; }
> enabled omx && require_header OMX_Core.h
> +enabled opencl && require OpenCL CL/cl.h clGetPlatformIDs -lOpenCL
> enabled openssl && { { check_pkg_config openssl openssl/ssl.h
> OPENSSL_init_ssl ||
> check_pkg_config openssl openssl/ssl.h
> SSL_library_init; } && {
> add_cflags $openssl_cflags && add_extralibs
> $openssl_extralibs; } ||
> diff --git a/doc/APIchanges b/doc/APIchanges
> index 253454358..208ddd318 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -13,6 +13,10 @@ libavutil: 2015-08-28
>
> API changes, most recent first:
>
> +2017-xx-xx - xxxxxxx - lavu 55.34.0 - hwcontext.h hwcontext_opencl.h
> + Add AV_HWDEVICE_TYPE_OPENCL and a new installed header with
> + OpenCL-specific hwcontext definitions.
> +
> 2017-xx-xx - xxxxxxx - lavu 55.33.0 - pixfmt.h
> Add AV_PIX_FMT_OPENCL.
>
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 60e180c79..49e84854a 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -115,6 +115,7 @@ OBJS-$(CONFIG_CUDA) +=
> hwcontext_cuda.o
> OBJS-$(CONFIG_DXVA2) += hwcontext_dxva2.o
> OBJS-$(CONFIG_LIBMFX) += hwcontext_qsv.o
> OBJS-$(CONFIG_LZO) += lzo.o
> +OBJS-$(CONFIG_OPENCL) += hwcontext_opencl.o
> OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o
> OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
>
> @@ -123,6 +124,7 @@ OBJS += $(COMPAT_OBJS:%=../compat/%)
> SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda.h
> SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h
> SKIPHEADERS-$(CONFIG_LIBMFX) += hwcontext_qsv.h
> +SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h
> SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h
> SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h
>
> diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
> index e4087a15c..27aab3086 100644
> --- a/libavutil/hwcontext.c
> +++ b/libavutil/hwcontext.c
> @@ -45,6 +45,9 @@ static const HWContextType * const hw_table[] = {
> #if CONFIG_VDPAU
> &ff_hwcontext_type_vdpau,
> #endif
> +#if CONFIG_OPENCL
> + &ff_hwcontext_type_opencl,
> +#endif
> NULL,
> };
>
> diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
> index b81a833b2..752c09e6f 100644
> --- a/libavutil/hwcontext.h
> +++ b/libavutil/hwcontext.h
> @@ -30,6 +30,7 @@ enum AVHWDeviceType {
> AV_HWDEVICE_TYPE_VAAPI,
> AV_HWDEVICE_TYPE_DXVA2,
> AV_HWDEVICE_TYPE_QSV,
> + AV_HWDEVICE_TYPE_OPENCL,
> };
>
> typedef struct AVHWDeviceInternal AVHWDeviceInternal;
> diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
> index 1ac0f1f95..ecda70558 100644
> --- a/libavutil/hwcontext_internal.h
> +++ b/libavutil/hwcontext_internal.h
> @@ -159,6 +159,7 @@ int ff_hwframe_map_create(AVBufferRef *hwframe_ref,
>
> extern const HWContextType ff_hwcontext_type_cuda;
> extern const HWContextType ff_hwcontext_type_dxva2;
> +extern const HWContextType ff_hwcontext_type_opencl;
> extern const HWContextType ff_hwcontext_type_qsv;
> extern const HWContextType ff_hwcontext_type_vaapi;
> extern const HWContextType ff_hwcontext_type_vdpau;
> diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
> new file mode 100644
> index 000000000..0b108b7fc
> --- /dev/null
> +++ b/libavutil/hwcontext_opencl.c
> @@ -0,0 +1,983 @@
> +/*
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include <string.h>
> +
> +#include "config.h"
> +
> +#include "avassert.h"
> +#include "avstring.h"
> +#include "common.h"
> +#include "hwcontext.h"
> +#include "hwcontext_internal.h"
> +#include "hwcontext_opencl.h"
> +#include "mem.h"
> +#include "pixdesc.h"
> +
> +// The maximum number of planes in an image. This must be structly
> +// less than AV_NUM_DATA_POINTERS because we place the whole-frame
> +// reference in a buffer entry after the final plane. For now, four
> +// is sufficient for any format we can sensibly support.
> +#define MAX_PLANES 4
> +
> +
> +typedef struct OpenCLDeviceContext {
> + // Internal command queue used for transfer/mapping operations
> + // if the user does not supply one themselves.
> + int internal_command_queue;
> + cl_command_queue command_queue;
> +
> + // Platform/device-specific functions.
> +} OpenCLDeviceContext;
> +
> +static void opencl_error_callback(const char *errinfo,
> + const void *private_info, size_t cb,
> + void *user_data)
> +{
> + AVHWDeviceContext *ctx = user_data;
> + av_log(ctx, AV_LOG_ERROR, "OpenCL error: %s\n", errinfo);
> +}
> +
> +static void opencl_device_free(AVHWDeviceContext *ctx)
> +{
> + AVOpenCLDeviceContext *hwctx = ctx->hwctx;
> + cl_int cle;
> +
> + cle = clReleaseContext(hwctx->context);
> + if (cle != CL_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to release OpenCL "
> + "context: %d.\n", cle);
> + }
> +}
> +
> +static struct {
> + const char *key;
> + cl_platform_info name;
> +} opencl_platform_params[] = {
> + { "platform_version", CL_PLATFORM_VERSION },
> + { "platform_name", CL_PLATFORM_NAME },
> + { "platform_vendor", CL_PLATFORM_VENDOR },
> + { "platform_extensions", CL_PLATFORM_EXTENSIONS },
> +};
> +
> +static struct {
> + const char *key;
> + cl_device_info name;
> +} opencl_device_params[] = {
> + { "device_name", CL_DEVICE_NAME },
> + { "device_vendor", CL_DEVICE_VENDOR },
> + { "device_version", CL_DEVICE_VERSION },
> + { "device_extensions", CL_DEVICE_EXTENSIONS },
> + { "driver_version", CL_DRIVER_VERSION },
> +};
> +
> +static struct {
> + const char *key;
> + cl_device_type type;
> +} opencl_device_types[] = {
> + { "cpu", CL_DEVICE_TYPE_CPU },
> + { "gpu", CL_DEVICE_TYPE_GPU },
> + { "accelerator", CL_DEVICE_TYPE_ACCELERATOR },
> + { "custom", CL_DEVICE_TYPE_CUSTOM },
> + { "all", CL_DEVICE_TYPE_ALL },
> +};
> +
> +static int opencl_device_create_internal(AVHWDeviceContext *ctx,
> + const char *device,
> + AVDictionary *opts, int flags,
> + cl_context_properties *props)
> +{
> + cl_uint nb_platforms;
> + cl_platform_id *platforms = NULL;
> + cl_uint nb_devices;
> + cl_device_id *devices = NULL;
> + cl_device_type device_type;
> + AVOpenCLDeviceContext *hwctx = ctx->hwctx;
> + cl_int cle;
> + const AVDictionaryEntry *param;
> + char tmp[2048];
> + size_t size;
> + int ret, found, p, d, i;
> +
> + cle = clGetPlatformIDs(0, NULL, &nb_platforms);
> + if (cle != CL_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to get number of "
> + "OpenCL platforms: %d.\n", cle);
> + ret = AVERROR(ENOSYS);
> + goto fail;
> + }
> + av_log(ctx, AV_LOG_VERBOSE, "%d OpenCL platforms found.\n",
> + nb_platforms);
> +
> + platforms = av_malloc_array(nb_platforms, sizeof(*platforms));
> + if (!platforms) {
> + ret = AVERROR(ENOMEM);
> + goto fail;
> + }
> +
> + cle = clGetPlatformIDs(nb_platforms, platforms, NULL);
> + if (cle != CL_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to get list of OpenCL "
> + "platforms: %d.\n", cle);
> + ret = AVERROR(ENOSYS);
> + goto fail;
> + }
> +
> + param = av_dict_get(opts, "device_type", NULL, 0);
> + if (param) {
> + device_type = 0;
> + for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_types); i++) {
> + if (!strcmp(opencl_device_types[i].key, param->value)) {
> + device_type = opencl_device_types[i].type;
> + break;
> + }
> + }
> + if (!device_type) {
> + av_log(ctx, AV_LOG_ERROR, "Unknown device type %s.\n",
> + param->value);
> + ret = AVERROR(EINVAL);
> + goto fail;
> + }
> + } else {
> + device_type = CL_DEVICE_TYPE_DEFAULT;
> + }
> +
> + found = 0;
> + for (p = 0; p < nb_platforms; p++) {
> + int match = 1;
> + for (i = 0; i < FF_ARRAY_ELEMS(opencl_platform_params); i++) {
> + param = av_dict_get(opts, opencl_platform_params[i].key,
> + NULL, 0);
> + if (!param)
> + continue;
> + cle = clGetPlatformInfo(platforms[p],
> + opencl_platform_params[i].name,
> + sizeof(tmp), tmp, &size);
> + if (!av_stristr(tmp, param->value))
> + match = 0;
> + }
> + if (!match)
> + continue;
> +
> + cle = clGetDeviceIDs(platforms[p], device_type,
> + 0, NULL, &nb_devices);
> + if (cle == CL_DEVICE_NOT_FOUND) {
> + av_log(ctx, AV_LOG_VERBOSE, "No devices of type %lu found "
> + "on platform %d.\n", (unsigned long)device_type, cle);
> + ret = AVERROR(ENOSYS);
> + goto fail;
> + } else if (cle != CL_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to get number of "
> + "devices on platform %d: %d.\n", p, cle);
> + ret = AVERROR(ENOSYS);
> + goto fail;
> + }
> + av_log(ctx, AV_LOG_VERBOSE, "%d OpenCL devices found on "
> + "platform %d.\n", nb_devices, p);
> +
> + devices = av_malloc_array(nb_devices, sizeof(*devices));
> + if (!devices) {
> + ret = AVERROR(ENOMEM);
> + goto fail;
> + }
> +
> + cle = clGetDeviceIDs(platforms[p], device_type,
> + nb_devices, devices, NULL);
> + if (cle != CL_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to get list of devices "
> + "on platform %d: %d.\n", p, cle);
> + ret = AVERROR(ENODEV);
> + goto fail;
> + }
> +
> + for (d = 0; d < nb_devices; d++) {
> + match = 1;
> + for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_params); i++) {
> + param = av_dict_get(opts, opencl_device_params[i].key,
> + NULL, 0);
> + if (!param)
> + continue;
> + cle = clGetDeviceInfo(devices[d],
> + opencl_device_params[i].name,
> + sizeof(tmp), tmp, &size);
> + if (!av_stristr(tmp, param->value))
> + match = 0;
> + }
> + if (!match)
> + continue;
> +
> + av_log(ctx, AV_LOG_VERBOSE, "Matched platform %d "
> + "device %d.\n", p, d);
> + ++found;
> + memcpy(&hwctx->platform_id, platforms + p, sizeof(*platforms));
> + memcpy(&hwctx->device_id, devices + d, sizeof(*devices));
> + }
> +
> + av_freep(&devices);
> + }
> +
> + if (found == 0) {
> + av_log(ctx, AV_LOG_ERROR, "No matching devices found.\n");
> + ret = AVERROR(ENODEV);
> + goto fail;
> + }
> + if (found > 1) {
> + av_log(ctx, AV_LOG_ERROR, "More than one matching device found.\n");
> + ret = AVERROR(ENODEV);
> + goto fail;
> + }
> +
> + hwctx->context = clCreateContext(props, 1, &hwctx->device_id,
> + &opencl_error_callback, ctx,
> + &cle);
> + if (!hwctx->context) {
> + av_log(ctx, AV_LOG_ERROR, "Failed to create OpenCL context: "
> + "%d.\n", cle);
> + ret = AVERROR(ENODEV);
> + goto fail;
> + }
> +
> + ctx->free = &opencl_device_free;
> +
> + ret = 0;
> +fail:
> + av_freep(&platforms);
> + av_freep(&devices);
> + return ret;
> +}
> +
> +static int opencl_device_init(AVHWDeviceContext *hwdev)
> +{
> + AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
> + OpenCLDeviceContext *ctx = hwdev->internal->priv;
> + cl_int cle;
> +
> + if (!hwctx->command_queue) {
> + ctx->internal_command_queue = 1;
> + ctx->command_queue = clCreateCommandQueue(hwctx->context,
> + hwctx->device_id,
> + 0, &cle);
> + if (!ctx->command_queue) {
> + av_log(hwdev, AV_LOG_ERROR, "Failed to create internal "
> + "command queue: %d.\n", cle);
> + return AVERROR(EIO);
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int opencl_device_create(AVHWDeviceContext *ctx, const char *device,
> + AVDictionary *opts, int flags)
> +{
> + return opencl_device_create_internal(ctx, device, opts, flags, NULL);
> +}
> +
> +static void opencl_device_uninit(AVHWDeviceContext *hwdev)
> +{
> + OpenCLDeviceContext *ctx = hwdev->internal->priv;
> + cl_int cle;
> +
> + if (ctx->internal_command_queue) {
> + cle = clReleaseCommandQueue(ctx->command_queue);
> + if (cle != CL_SUCCESS) {
> + av_log(hwdev, AV_LOG_ERROR, "Failed to release internal "
> + "command queue: %d.\n", cle);
> + }
> + }
> +}
> +
> +static int opencl_get_plane_format(enum AVPixelFormat pixfmt,
> + int plane, int width, int height,
> + cl_image_format *image_format,
> + cl_image_desc *image_desc)
> +{
> + av_assert0(image_format && image_desc);
> +
> + memset(image_format, 0, sizeof(*image_format));
> + memset(image_desc, 0, sizeof(*image_desc));
> + image_desc->image_type = CL_MEM_OBJECT_IMAGE2D;
> +
> + switch (pixfmt) {
> + case AV_PIX_FMT_YUV420P:
> + if (width % 2)
> + return AVERROR(EINVAL);
> + if (plane > 2)
> + return AVERROR(ENOENT);
> + image_format->image_channel_order = CL_R;
> + image_format->image_channel_data_type = CL_UNORM_INT8;
> + image_desc->image_width = width / (1 + (plane > 0));
> + image_desc->image_height = height / (1 + (plane > 0));
> + image_desc->image_row_pitch = image_desc->image_width;
> + break;
> +
> + case AV_PIX_FMT_NV12:
> + if (width % 2)
> + return AVERROR(EINVAL);
> + if (plane > 1)
> + return AVERROR(ENOENT);
> + image_format->image_channel_order = plane ? CL_RG : CL_R;
> + image_format->image_channel_data_type = CL_UNORM_INT8;
> + image_desc->image_width = width / (1 + plane);
> + image_desc->image_height = height / (1 + plane);
> + image_desc->image_row_pitch = width;
> + break;
> +
> + case AV_PIX_FMT_P010:
> + if (width % 2)
> + return AVERROR(EINVAL);
> + if (plane > 1)
> + return AVERROR(ENOENT);
> + image_format->image_channel_order = plane ? CL_RG : CL_R;
> + image_format->image_channel_data_type = CL_UNORM_INT16;
> + image_desc->image_width = width / (1 + plane);
> + image_desc->image_height = height / (1 + plane);
> + image_desc->image_row_pitch = 2 * width;
> + break;
> +
> + case AV_PIX_FMT_RGBA:
> + image_format->image_channel_order = CL_RGBA;
> + if (0)
> + case AV_PIX_FMT_BGRA:
> + image_format->image_channel_order = CL_BGRA;
> + if (0)
> + case AV_PIX_FMT_ARGB:
> + image_format->image_channel_order = CL_ARGB;
> +#ifdef CL_ABGR
> + if (0)
wut
> + case AV_PIX_FMT_ABGR:
> + image_format->image_channel_order = CL_ABGR;
> +#endif
> + if (plane > 0)
> + return AVERROR(ENOENT);
> + image_format->image_channel_data_type = CL_UNORM_INT8;
> + image_desc->image_width = width;
> + image_desc->image_height = height;
> + image_desc->image_row_pitch = 4 * width;
> + break;
> +
> + default:
> + return AVERROR(EINVAL);
> + }
> +
> + return 0;
> +}
> +
> +static int opencl_frames_get_constraints(AVHWDeviceContext *hwdev,
> + const void *hwconfig,
> + AVHWFramesConstraints *constraints)
> +{
> + AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
> + cl_uint nb_image_formats;
> + cl_image_format *image_formats = NULL;
> + cl_int cle;
> + enum AVPixelFormat pix_fmt;
> + int err, pix_fmts_found;
> + size_t max_width, max_height;
> +
> + cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH,
> + sizeof(max_width), &max_width, NULL);
> + if (cle != CL_SUCCESS) {
> + av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
> + "supported image width: %d.\n", cle);
> + } else {
> + constraints->max_width = max_width;
> + }
> + cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
> + sizeof(max_height), &max_height, NULL);
> + if (cle != CL_SUCCESS) {
> + av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
> + "supported image height: %d.\n", cle);
> + } else {
> + constraints->max_height = max_height;
> + }
> + av_log(hwdev, AV_LOG_DEBUG, "Maximum supported image size %dx%d.\n",
> + constraints->max_width, constraints->max_height);
> +
> + cle = clGetSupportedImageFormats(hwctx->context, 0,
> + CL_MEM_OBJECT_IMAGE2D,
> + 0, NULL, &nb_image_formats);
> + if (cle != CL_SUCCESS) {
> + av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
> + "image formats: %d.\n", cle);
> + err = AVERROR(ENOSYS);
> + goto fail;
> + }
> + if (nb_image_formats == 0) {
> + av_log(hwdev, AV_LOG_ERROR, "No image support in OpenCL "
> + "driver (zero supported image formats).\n");
> + err = AVERROR(ENOSYS);
> + goto fail;
> + }
> +
> + image_formats =
> + av_malloc(nb_image_formats * sizeof(*image_formats));
> + if (!image_formats) {
> + err = AVERROR(ENOMEM);
> + goto fail;
> + }
> +
> + cle = clGetSupportedImageFormats(hwctx->context, 0,
> + CL_MEM_OBJECT_IMAGE2D,
> + nb_image_formats,
> + image_formats, NULL);
> + if (cle != CL_SUCCESS) {
> + av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
> + "image formats: %d.\n", cle);
> + err = AVERROR(ENOSYS);
> + goto fail;
> + }
> +
> + pix_fmts_found = 0;
> + for (pix_fmt = 0; pix_fmt < AV_PIX_FMT_NB; pix_fmt++) {
Multiple API calls, looping over _all_ libavutil pixfmts... seems to do
a lot of work for a call you'd expect to be fast and often called.
Maybe not a problem.
> + cl_image_format image_format;
> + cl_image_desc image_desc;
> + int plane, i;
> +
> + for (plane = 0;; plane++) {
> + err = opencl_get_plane_format(pix_fmt, plane, 0, 0,
> + &image_format,
> + &image_desc);
> + if (err < 0)
> + break;
> +
> + for (i = 0; i < nb_image_formats; i++) {
> + if (image_formats[i].image_channel_order ==
> + image_format.image_channel_order &&
> + image_formats[i].image_channel_data_type ==
> + image_format.image_channel_data_type)
> + break;
> + }
> + if (i == nb_image_formats) {
> + err = AVERROR(EINVAL);
> + break;
> + }
> + }
> + if (err != AVERROR(ENOENT))
> + continue;
> +
> + av_log(hwdev, AV_LOG_DEBUG, "Format %s supported.\n",
> + av_get_pix_fmt_name(pix_fmt));
> +
> + constraints->valid_sw_formats =
> + av_realloc_array(constraints->valid_sw_formats,
> + pix_fmts_found + 2,
> + sizeof(*constraints->valid_sw_formats));
> + if (!constraints->valid_sw_formats) {
> + err = AVERROR(ENOMEM);
> + goto fail;
> + }
> + constraints->valid_sw_formats[pix_fmts_found] = pix_fmt;
> + constraints->valid_sw_formats[pix_fmts_found + 1] =
> + AV_PIX_FMT_NONE;
> + ++pix_fmts_found;
(Don't mind me, just stylistically objecting to pre-decrement.)
> + }
> +
> + av_freep(&image_formats);
> +
> + constraints->valid_hw_formats =
> + av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
> + if (!constraints->valid_hw_formats) {
> + err = AVERROR(ENOMEM);
> + goto fail;
> + }
> + constraints->valid_hw_formats[0] = AV_PIX_FMT_OPENCL;
> + constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
> +
> + return 0;
> +
> +fail:
> + av_freep(&image_formats);
> + return err;
> +}
> +
> +static void opencl_buffer_free(void *opaque, uint8_t *data)
> +{
> + AVHWFramesContext *hwfc = opaque;
> + cl_int cle;
> +
> + cle = clReleaseMemObject((cl_mem)data);
> + if (cle != CL_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to release buffer: %d.\n",
> + cle);
> + }
> +}
> +
> +static AVBufferRef *opencl_pool_alloc(void *opaque, int size)
> +{
> + AVHWFramesContext *hwfc = opaque;
> + AVOpenCLDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> + cl_int cle;
> + cl_mem mem;
> + size_t total_size;
> + int err, plane;
> + AVBufferRef *ref;
> +
> + // We allocate a buffer covering the whole image and then make
> + // sub-buffers for each plane. This makes sure that the image is
> + // one large object so that we can share with other APIs.
> +
> + total_size = 0;
> +
> + for (plane = 0;; plane++) {
> + cl_image_format image_format;
> + cl_image_desc image_desc;
> +
> + err = opencl_get_plane_format(hwfc->sw_format, plane,
> + hwfc->width, hwfc->height,
> + &image_format, &image_desc);
> + if (err < 0)
> + break;
> +
> + total_size += (image_desc.image_row_pitch *
> + image_desc.image_height);
> + }
> +
> + mem = clCreateBuffer(hwctx->context, CL_MEM_READ_WRITE,
> + total_size, NULL, &cle);
> + if (!mem) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to allocate buffer "
> + "(%zu bytes): %d.\n", total_size, cle);
> + return NULL;
> + }
> +
> + ref = av_buffer_create((uint8_t*)mem, sizeof(cl_mem),
> + &opencl_buffer_free, hwfc, 0);
> + if (!ref)
> + return NULL;
> +
> + return ref;
> +}
> +
> +static int opencl_frames_init(AVHWFramesContext *hwfc)
> +{
> + if (!hwfc->pool) {
> + hwfc->internal->pool_internal =
> + av_buffer_pool_init2(sizeof(cl_mem), hwfc,
> + &opencl_pool_alloc, NULL);
> + if (!hwfc->internal->pool_internal)
> + return AVERROR(ENOMEM);
> + }
> +
> + return 0;
> +}
> +
> +static int opencl_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
> +{
> + AVOpenCLDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> + AVBufferRef *outer_ref = NULL;
> + cl_mem mem, mem_plane[MAX_PLANES], image_plane[MAX_PLANES];
> + cl_int cle;
> + size_t offset;
> + int err, plane;
> +
> + outer_ref = av_buffer_pool_get(hwfc->pool);
> + if (!outer_ref)
> + return AVERROR(ENOMEM);
> +
> + mem = (cl_mem)outer_ref->data;
> +
> + offset = 0;
> + for (plane = 0;; plane++) {
> + cl_buffer_region region;
> + cl_image_format image_format;
> + cl_image_desc image_desc;
> +
> + err = opencl_get_plane_format(hwfc->sw_format, plane,
> + hwfc->width, hwfc->height,
> + &image_format, &image_desc);
> + if (err == AVERROR(ENOENT))
> + break;
> + if (err < 0)
> + return err;
> +
> + region.origin = offset;
> + region.size = (image_desc.image_row_pitch *
> + image_desc.image_height);
> +
> + mem_plane[plane] = clCreateSubBuffer(mem, CL_MEM_READ_WRITE,
> + CL_BUFFER_CREATE_TYPE_REGION,
> + ®ion, &cle);
> + if (!mem_plane[plane]) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to create sub-buffer "
> + "for plane %d: %d.\n", plane, cle);
> + return AVERROR(EIO);
> + }
> +
> + image_desc.buffer = mem_plane[plane];
> +
> + image_plane[plane] = clCreateImage(hwctx->context,
> + CL_MEM_READ_WRITE,
> + &image_format,
> + &image_desc,
> + NULL, &cle);
> + if (!image_plane[plane]) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to create image from "
> + "plane %d sub-buffer: %d.\n", plane, cle);
> + return AVERROR(EIO);
> + }
> +
> + offset += region.size;
> + frame->data[plane] = (uint8_t*)image_plane[plane];
> + frame->linesize[plane] = hwfc->width;
> +
> + frame->buf[plane] = av_buffer_create((uint8_t*)image_plane[plane],
> + sizeof(cl_mem),
> + &opencl_buffer_free,
> + hwfc, 0);
> + if (!frame->buf[plane])
> + return AVERROR(ENOMEM);
> +
> + // Unreference the sub-buffer object immediately: we don't need
> + // to access it directly and the image object is usable without
> + // an explicit reference held externally.
> + cle = clReleaseMemObject(mem_plane[plane]);
> + if (cle != CL_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to release image "
> + "buffer: %d.\n", cle);
> + }
> + }
> +
> + frame->buf[plane] = outer_ref;
> +
> + frame->format = AV_PIX_FMT_OPENCL;
> + frame->width = hwfc->width;
> + frame->height = hwfc->height;
> +
> + return 0;
> +}
> +
> +static int opencl_transfer_get_formats(AVHWFramesContext *hwfc,
> + enum AVHWFrameTransferDirection dir,
> + enum AVPixelFormat **formats)
> +{
> + enum AVPixelFormat *fmts;
> +
> + fmts = av_malloc_array(2, sizeof(*fmts));
> + if (!fmts)
> + return AVERROR(ENOMEM);
> +
> + fmts[0] = hwfc->sw_format;
> + fmts[1] = AV_PIX_FMT_NONE;
> +
> + *formats = fmts;
> + return 0;
> +}
> +
> +static cl_command_queue opencl_get_command_queue(AVHWFramesContext *hwfc)
> +{
> + AVOpenCLFramesContext *fc = hwfc->hwctx;
> + AVOpenCLDeviceContext *dc = hwfc->device_ctx->hwctx;
> + OpenCLDeviceContext *ctx = hwfc->device_ctx->internal->priv;
> +
> + if (fc->command_queue)
> + return fc->command_queue;
> + if (dc->command_queue)
> + return dc->command_queue;
> + av_assert0(ctx->command_queue);
> + return ctx->command_queue;
> +}
> +
> +static int opencl_wait_events(AVHWFramesContext *hwfc,
> + cl_event *events, int nb_events)
> +{
> + cl_int cle;
> + int i;
> +
> + cle = clWaitForEvents(nb_events, events);
> + if (cle != CL_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to wait for event "
> + "completion: %d.\n", cle);
> + return AVERROR(EIO);
> + }
> +
> + for (i = 0; i < nb_events; i++) {
> + cle = clReleaseEvent(events[i]);
> + if (cle != CL_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to release "
> + "event: %d.\n", cle);
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int opencl_transfer_data_from(AVHWFramesContext *hwfc,
> + AVFrame *dst, const AVFrame *src)
> +{
> + cl_image_format image_format;
> + cl_image_desc image_desc;
> + cl_int cle;
> + size_t origin[3] = { 0, 0, 0 };
> + size_t region[3];
> + cl_event events[MAX_PLANES];
> + int err, plane;
> +
> + if (dst->format != hwfc->sw_format)
> + return AVERROR(EINVAL);
> +
> + for (plane = 0;; plane++) {
> + err = opencl_get_plane_format(hwfc->sw_format, plane,
> + src->width, src->height,
> + &image_format, &image_desc);
> + if (err == AVERROR(ENOENT))
> + break;
> + if (err < 0)
> + return err;
> +
> + region[0] = image_desc.image_width;
> + region[1] = image_desc.image_height;
> + region[2] = 1;
> +
> + cle = clEnqueueReadImage(opencl_get_command_queue(hwfc),
> + (cl_mem)src->data[plane],
> + CL_TRUE, origin, region,
> + dst->linesize[plane], 0,
> + dst->data[plane],
> + 0, NULL, &events[plane]);
> + if (cle != CL_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue read of "
> + "OpenCL image plane %d: %d.\n", plane, cle);
> + return AVERROR(EIO);
> + }
> + }
> +
> + return opencl_wait_events(hwfc, events, plane);
> +}
> +
> +static int opencl_transfer_data_to(AVHWFramesContext *hwfc,
> + AVFrame *dst, const AVFrame *src)
> +{
> + cl_image_format image_format;
> + cl_image_desc image_desc;
> + cl_int cle;
> + size_t origin[3] = { 0, 0, 0 };
> + size_t region[3];
> + cl_event events[MAX_PLANES];
> + int err, plane;
> +
> + for (plane = 0;; plane++) {
> + err = opencl_get_plane_format(hwfc->sw_format, plane,
> + src->width, src->height,
> + &image_format, &image_desc);
> + if (err == AVERROR(ENOENT))
> + break;
> + if (err < 0)
> + return err;
> +
> + region[0] = image_desc.image_width;
> + region[1] = image_desc.image_height;
> + region[2] = 1;
> +
> + cle = clEnqueueWriteImage(opencl_get_command_queue(hwfc),
> + (cl_mem)dst->data[plane],
> + CL_TRUE, origin, region,
> + src->linesize[plane], 0,
> + src->data[plane],
> + 0, NULL, &events[plane]);
> + if (cle != CL_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue write of "
> + "OpenCL image plane %d: %d.\n", plane, cle);
> + return AVERROR(EIO);
> + }
> + }
> +
> + return opencl_wait_events(hwfc, events, plane);
> +}
> +
> +typedef struct OpenCLMapping {
> + // The mapped addresses for each plane.
> + // The destination frame is not available when we unmap, so these
> + // need to be stored separately.
> + void *address[MAX_PLANES];
> +} OpenCLMapping;
> +
> +static void opencl_unmap_frame(AVHWFramesContext *hwfc,
> + HWMapDescriptor *hwmap)
> +{
> + OpenCLMapping *map = hwmap->priv;
> + cl_event events[MAX_PLANES];
> + int plane;
> + cl_int cle;
> +
> + for (plane = 0; plane < FF_ARRAY_ELEMS(map->address); plane++) {
> + if (!map->address[plane])
> + break;
> +
> + cle = clEnqueueUnmapMemObject(opencl_get_command_queue(hwfc),
> + (cl_mem)hwmap->source->data[plane],
> + map->address[plane],
> + 0, NULL, &events[plane]);
> + if (cle != CL_SUCCESS) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to unmap OpenCL "
> + "image plane %d: %d.\n", plane, cle);
> + }
> + }
> +
> + opencl_wait_events(hwfc, events, plane);
> +
> + av_free(map);
> +}
> +
> +static int opencl_map_frame(AVHWFramesContext *hwfc, AVFrame *dst,
> + const AVFrame *src, int flags)
> +{
> + cl_command_queue command_queue = opencl_get_command_queue(hwfc);
> + cl_map_flags map_flags;
> + cl_image_format image_format;
> + cl_image_desc image_desc;
> + cl_int cle;
> + OpenCLMapping *map;
> + size_t origin[3] = { 0, 0, 0 };
> + size_t region[3];
> + size_t row_pitch;
> + cl_event events[MAX_PLANES];
> + int err, plane;
> +
> + av_assert0(hwfc->sw_format == dst->format);
> +
> + if (flags & AV_HWFRAME_MAP_OVERWRITE &&
> + !(flags & AV_HWFRAME_MAP_READ)) {
> + // This is mutually exclusive with the read/write flags, so
> + // there is no way to map with read here.
> + map_flags = CL_MAP_WRITE_INVALIDATE_REGION;
> + } else {
> + map_flags = 0;
> + if (flags & AV_HWFRAME_MAP_READ)
> + map_flags |= CL_MAP_READ;
> + if (flags & AV_HWFRAME_MAP_WRITE)
> + map_flags |= CL_MAP_WRITE;
> + }
> +
> + map = av_mallocz(sizeof(*map));
> + if (!map)
> + return AVERROR(ENOMEM);
> +
> + for (plane = 0;; plane++) {
> + err = opencl_get_plane_format(hwfc->sw_format, plane,
> + src->width, src->height,
> + &image_format, &image_desc);
> + if (err == AVERROR(ENOENT))
> + break;
> + if (err < 0)
> + goto fail;
> +
> + region[0] = image_desc.image_width;
> + region[1] = image_desc.image_height;
> + region[2] = 1;
> +
> + map->address[plane] =
> + clEnqueueMapImage(command_queue,
> + (cl_mem)src->data[plane],
> + CL_TRUE, map_flags, origin, region,
> + &row_pitch, NULL, 0, NULL,
> + &events[plane], &cle);
> + if (!map->address[plane]) {
> + av_log(hwfc, AV_LOG_ERROR, "Failed to map OpenCL "
> + "image plane %d: %d.\n", plane, cle);
> + err = AVERROR(EIO);
> + goto fail;
> + }
> +
> + dst->data[plane] = map->address[plane];
> + dst->linesize[plane] = image_desc.image_row_pitch;
> +
> + av_log(hwfc, AV_LOG_DEBUG, "Map plane %d (%p -> %p).\n",
> + plane, src->data[plane], dst->data[plane]);
> + }
> +
> + err = opencl_wait_events(hwfc, events, plane);
> + if (err < 0)
> + goto fail;
> +
> + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
> + &opencl_unmap_frame, map);
> + if (err < 0)
> + goto fail;
> +
> + dst->width = src->width;
> + dst->height = src->height;
> +
> + return 0;
> +
> +fail:
> + for (plane = 0; plane < MAX_PLANES; plane++) {
> + if (!map->address[plane])
> + break;
> + clEnqueueUnmapMemObject(command_queue,
> + (cl_mem)src->data[plane],
> + map->address[plane],
> + 0, NULL, &events[plane]);
> + }
> + if (plane > 0)
> + opencl_wait_events(hwfc, events, plane);
> + av_freep(&map);
> + return err;
> +}
> +
> +static int opencl_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
> + const AVFrame *src, int flags)
> +{
> + av_assert0(dst->format == AV_PIX_FMT_OPENCL);
> + switch (src->format) {
> + default:
> + return AVERROR(ENOSYS);
> + }
> +}
> +
> +static int opencl_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
> + const AVFrame *src, int flags)
> +{
> + av_assert0(src->format == AV_PIX_FMT_OPENCL);
> + switch (dst->format) {
> + default:
> + if (hwfc->sw_format != dst->format)
> + return AVERROR(ENOSYS);
> + return opencl_map_frame(hwfc, dst, src, flags);
> + }
> +}
> +
> +const HWContextType ff_hwcontext_type_opencl = {
> + .type = AV_HWDEVICE_TYPE_OPENCL,
> + .name = "OpenCL",
> +
> + .device_hwctx_size = sizeof(AVOpenCLDeviceContext),
> + .device_priv_size = sizeof(OpenCLDeviceContext),
> + .frames_hwctx_size = sizeof(AVOpenCLFramesContext),
> +
> + .device_create = &opencl_device_create,
> + .device_init = &opencl_device_init,
> + .device_uninit = &opencl_device_uninit,
> +
> + .frames_get_constraints = &opencl_frames_get_constraints,
> + .frames_init = &opencl_frames_init,
> + .frames_uninit = NULL,
> + .frames_get_buffer = &opencl_get_buffer,
> +
> + .transfer_get_formats = &opencl_transfer_get_formats,
> + .transfer_data_to = &opencl_transfer_data_to,
> + .transfer_data_from = &opencl_transfer_data_from,
> +
> + .map_to = &opencl_map_to,
> + .map_from = &opencl_map_from,
> +
> + .pix_fmts = (const enum AVPixelFormat[]) {
> + AV_PIX_FMT_OPENCL,
> + AV_PIX_FMT_NONE
> + },
> +};
> diff --git a/libavutil/hwcontext_opencl.h b/libavutil/hwcontext_opencl.h
> new file mode 100644
> index 000000000..955c1797e
> --- /dev/null
> +++ b/libavutil/hwcontext_opencl.h
> @@ -0,0 +1,78 @@
> +/*
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#ifndef AVUTIL_HWCONTEXT_OPENCL_H
> +#define AVUTIL_HWCONTEXT_OPENCL_H
> +
> +#include <CL/cl.h>
> +
> +/**
> + * @file
> + * API-specific header for AV_HWDEVICE_TYPE_OPENCL.
> + *
> + * Pools allocated internally are always dynamic, and are primarily intended
> + * to be used in OpenCL-only cases. If interoperation is required, it is
> + * typically required to allocate frames in the other API and then map the
> + * frames context to OpenCL with av_hwframe_ctx_create_derived().
> + *
> + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
> + * with the data pointer pointing at a cl_mem object which is a buffer large
> + * enough to contain all of the data of the intended frame. If the frame
> + * has multiple planes, the images are created in sub-buffers of the buffer
> + * in the pool.
> + */
> +
> +/**
> + * OpenCL device details.
> + *
> + * Allocated as AVHWDeviceContext.hwctx
> + */
> +typedef struct AVOpenCLDeviceContext {
> + cl_platform_id platform_id;
> + cl_device_id device_id;
> + /**
> + * The OpenCL context for all operationa on this device.
> + */
> + cl_context context;
> + /**
> + * The default command queue for this device, which will be used by all
> + * frames contexts which do not have their own command queue. If not
> + * intialised by the user, a default queue will be created.
> + */
> + cl_command_queue command_queue;
> +} AVOpenCLDeviceContext;
> +
> +/**
> + * OpenCL-specific data associated with a frame pool.
> + *
> + * Allocated as AVHWFramesContext.hwctx.
> + */
> +typedef struct AVOpenCLFramesContext {
> + /**
> + * The command queue used for internal asynchronous operations on this
> + * device (av_hwframe_transfer_data(), av_hwframe_map()). Note that
> + * currently these are made synchronous by calling clFinish() at the
> + * end of every operation sequence.
> + *
> + * If this is not set, the command queue from the associated device is
> + * used instead.
> + */
> + cl_command_queue command_queue;
> +} AVOpenCLFramesContext;
> +
> +#endif /* AVUTIL_HWCONTEXT_OPENCL_H */
> diff --git a/libavutil/version.h b/libavutil/version.h
> index 0f2b684fa..7c0e85249 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -54,7 +54,7 @@
> */
>
> #define LIBAVUTIL_VERSION_MAJOR 55
> -#define LIBAVUTIL_VERSION_MINOR 33
> +#define LIBAVUTIL_VERSION_MINOR 34
> #define LIBAVUTIL_VERSION_MICRO 0
>
> #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
Can't comment much on the actual code here.
Isn't it a bit strange that we also have hwcontext_cuda, and they don't
interact at all?
Those complex command queue operations might need a lock around them to
make them thread-safe. (I sure as hell don't want to need fragile
external locking around this API just because I might do
decoding/filtering/rendering on different threads to some degree.)
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel