On Sun, 19 Feb 2017 18:46:40 +0000
Mark Thompson <[email protected]> wrote:

> ---
>  configure                      |   3 +
>  doc/APIchanges                 |   4 +
>  libavutil/Makefile             |   2 +
>  libavutil/hwcontext.c          |   3 +
>  libavutil/hwcontext.h          |   1 +
>  libavutil/hwcontext_internal.h |   1 +
>  libavutil/hwcontext_opencl.c   | 983 
> +++++++++++++++++++++++++++++++++++++++++
>  libavutil/hwcontext_opencl.h   |  78 ++++
>  libavutil/version.h            |   2 +-
>  9 files changed, 1076 insertions(+), 1 deletion(-)
>  create mode 100644 libavutil/hwcontext_opencl.c
>  create mode 100644 libavutil/hwcontext_opencl.h
> 
> diff --git a/configure b/configure
> index 4635b73b0..5b78131ab 100755
> --- a/configure
> +++ b/configure
> @@ -244,6 +244,7 @@ External library support:
>    --enable-nvenc   Nvidia video encoding
>    --enable-omx     OpenMAX IL
>    --enable-omx-rpi OpenMAX IL for Raspberry Pi
> +  --enable-opencl  OpenCL processing
>    --enable-vaapi   Video Acceleration API (mainly Unix/Intel)
>    --enable-vda     Apple Video Decode Acceleration [auto]
>    --enable-vdpau   Nvidia Video Decode and Presentation API for Unix [auto]
> @@ -1267,6 +1268,7 @@ HWACCEL_LIBRARY_LIST="
>      mmal
>      nvenc
>      omx
> +    opencl
>      vaapi
>      vda
>      vdpau
> @@ -4733,6 +4735,7 @@ enabled omx_rpi           && { check_header OMX_Core.h 
> ||
>                                 { ! enabled cross_compile && add_cflags 
> -isystem/opt/vc/include/IL && check_header OMX_Core.h ; } ||
>                                 die "ERROR: OpenMAX IL headers not found"; }
>  enabled omx               && require_header OMX_Core.h
> +enabled opencl            && require OpenCL CL/cl.h clGetPlatformIDs -lOpenCL
>  enabled openssl           && { { check_pkg_config openssl openssl/ssl.h 
> OPENSSL_init_ssl ||
>                                   check_pkg_config openssl openssl/ssl.h 
> SSL_library_init; } && {
>                                 add_cflags $openssl_cflags && add_extralibs 
> $openssl_extralibs; } ||
> diff --git a/doc/APIchanges b/doc/APIchanges
> index 253454358..208ddd318 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -13,6 +13,10 @@ libavutil:     2015-08-28
>  
>  API changes, most recent first:
>  
> +2017-xx-xx - xxxxxxx - lavu 55.34.0 - hwcontext.h hwcontext_opencl.h
> +  Add AV_HWDEVICE_TYPE_OPENCL and a new installed header with
> +  OpenCL-specific hwcontext definitions.
> +
>  2017-xx-xx - xxxxxxx - lavu 55.33.0 - pixfmt.h
>    Add AV_PIX_FMT_OPENCL.
>  
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 60e180c79..49e84854a 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -115,6 +115,7 @@ OBJS-$(CONFIG_CUDA)                     += 
> hwcontext_cuda.o
>  OBJS-$(CONFIG_DXVA2)                    += hwcontext_dxva2.o
>  OBJS-$(CONFIG_LIBMFX)                   += hwcontext_qsv.o
>  OBJS-$(CONFIG_LZO)                      += lzo.o
> +OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
>  OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
>  OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
>  
> @@ -123,6 +124,7 @@ OBJS += $(COMPAT_OBJS:%=../compat/%)
>  SKIPHEADERS-$(CONFIG_CUDA)             += hwcontext_cuda.h
>  SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
>  SKIPHEADERS-$(CONFIG_LIBMFX)           += hwcontext_qsv.h
> +SKIPHEADERS-$(CONFIG_OPENCL)           += hwcontext_opencl.h
>  SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
>  SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
>  
> diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
> index e4087a15c..27aab3086 100644
> --- a/libavutil/hwcontext.c
> +++ b/libavutil/hwcontext.c
> @@ -45,6 +45,9 @@ static const HWContextType * const hw_table[] = {
>  #if CONFIG_VDPAU
>      &ff_hwcontext_type_vdpau,
>  #endif
> +#if CONFIG_OPENCL
> +    &ff_hwcontext_type_opencl,
> +#endif
>      NULL,
>  };
>  
> diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
> index b81a833b2..752c09e6f 100644
> --- a/libavutil/hwcontext.h
> +++ b/libavutil/hwcontext.h
> @@ -30,6 +30,7 @@ enum AVHWDeviceType {
>      AV_HWDEVICE_TYPE_VAAPI,
>      AV_HWDEVICE_TYPE_DXVA2,
>      AV_HWDEVICE_TYPE_QSV,
> +    AV_HWDEVICE_TYPE_OPENCL,
>  };
>  
>  typedef struct AVHWDeviceInternal AVHWDeviceInternal;
> diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
> index 1ac0f1f95..ecda70558 100644
> --- a/libavutil/hwcontext_internal.h
> +++ b/libavutil/hwcontext_internal.h
> @@ -159,6 +159,7 @@ int ff_hwframe_map_create(AVBufferRef *hwframe_ref,
>  
>  extern const HWContextType ff_hwcontext_type_cuda;
>  extern const HWContextType ff_hwcontext_type_dxva2;
> +extern const HWContextType ff_hwcontext_type_opencl;
>  extern const HWContextType ff_hwcontext_type_qsv;
>  extern const HWContextType ff_hwcontext_type_vaapi;
>  extern const HWContextType ff_hwcontext_type_vdpau;
> diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
> new file mode 100644
> index 000000000..0b108b7fc
> --- /dev/null
> +++ b/libavutil/hwcontext_opencl.c
> @@ -0,0 +1,983 @@
> +/*
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#include <string.h>
> +
> +#include "config.h"
> +
> +#include "avassert.h"
> +#include "avstring.h"
> +#include "common.h"
> +#include "hwcontext.h"
> +#include "hwcontext_internal.h"
> +#include "hwcontext_opencl.h"
> +#include "mem.h"
> +#include "pixdesc.h"
> +
> +// The maximum number of planes in an image.  This must be structly
> +// less than AV_NUM_DATA_POINTERS because we place the whole-frame
> +// reference in a buffer entry after the final plane.  For now, four
> +// is sufficient for any format we can sensibly support.
> +#define MAX_PLANES 4
> +
> +
> +typedef struct OpenCLDeviceContext {
> +    // Internal command queue used for transfer/mapping operations
> +    // if the user does not supply one themselves.
> +    int internal_command_queue;
> +    cl_command_queue command_queue;
> +
> +    // Platform/device-specific functions.
> +} OpenCLDeviceContext;
> +
> +static void opencl_error_callback(const char *errinfo,
> +                                  const void *private_info, size_t cb,
> +                                  void *user_data)
> +{
> +    AVHWDeviceContext *ctx = user_data;
> +    av_log(ctx, AV_LOG_ERROR, "OpenCL error: %s\n", errinfo);
> +}
> +
> +static void opencl_device_free(AVHWDeviceContext *ctx)
> +{
> +    AVOpenCLDeviceContext *hwctx = ctx->hwctx;
> +    cl_int cle;
> +
> +    cle = clReleaseContext(hwctx->context);
> +    if (cle != CL_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to release OpenCL "
> +               "context: %d.\n", cle);
> +    }
> +}
> +
> +static struct {
> +    const char *key;
> +    cl_platform_info name;
> +} opencl_platform_params[] = {
> +    { "platform_version",    CL_PLATFORM_VERSION    },
> +    { "platform_name",       CL_PLATFORM_NAME       },
> +    { "platform_vendor",     CL_PLATFORM_VENDOR     },
> +    { "platform_extensions", CL_PLATFORM_EXTENSIONS },
> +};
> +
> +static struct {
> +    const char *key;
> +    cl_device_info name;
> +} opencl_device_params[] = {
> +    { "device_name",         CL_DEVICE_NAME         },
> +    { "device_vendor",       CL_DEVICE_VENDOR       },
> +    { "device_version",      CL_DEVICE_VERSION      },
> +    { "device_extensions",   CL_DEVICE_EXTENSIONS   },
> +    { "driver_version",      CL_DRIVER_VERSION      },
> +};
> +
> +static struct {
> +    const char *key;
> +    cl_device_type type;
> +} opencl_device_types[] = {
> +    { "cpu",         CL_DEVICE_TYPE_CPU         },
> +    { "gpu",         CL_DEVICE_TYPE_GPU         },
> +    { "accelerator", CL_DEVICE_TYPE_ACCELERATOR },
> +    { "custom",      CL_DEVICE_TYPE_CUSTOM      },
> +    { "all",         CL_DEVICE_TYPE_ALL         },
> +};
> +
> +static int opencl_device_create_internal(AVHWDeviceContext *ctx,
> +                                         const char *device,
> +                                         AVDictionary *opts, int flags,
> +                                         cl_context_properties *props)
> +{
> +    cl_uint      nb_platforms;
> +    cl_platform_id *platforms = NULL;
> +    cl_uint      nb_devices;
> +    cl_device_id   *devices = NULL;
> +    cl_device_type  device_type;
> +    AVOpenCLDeviceContext *hwctx = ctx->hwctx;
> +    cl_int cle;
> +    const AVDictionaryEntry *param;
> +    char tmp[2048];
> +    size_t size;
> +    int ret, found, p, d, i;
> +
> +    cle = clGetPlatformIDs(0, NULL, &nb_platforms);
> +    if (cle != CL_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to get number of "
> +               "OpenCL platforms: %d.\n", cle);
> +        ret = AVERROR(ENOSYS);
> +        goto fail;
> +    }
> +    av_log(ctx, AV_LOG_VERBOSE, "%d OpenCL platforms found.\n",
> +           nb_platforms);
> +
> +    platforms = av_malloc_array(nb_platforms, sizeof(*platforms));
> +    if (!platforms) {
> +        ret = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    cle = clGetPlatformIDs(nb_platforms, platforms, NULL);
> +    if (cle != CL_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to get list of OpenCL "
> +               "platforms: %d.\n", cle);
> +        ret = AVERROR(ENOSYS);
> +        goto fail;
> +    }
> +
> +    param = av_dict_get(opts, "device_type", NULL, 0);
> +    if (param) {
> +        device_type = 0;
> +        for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_types); i++) {
> +            if (!strcmp(opencl_device_types[i].key, param->value)) {
> +                device_type = opencl_device_types[i].type;
> +                break;
> +            }
> +        }
> +        if (!device_type) {
> +            av_log(ctx, AV_LOG_ERROR, "Unknown device type %s.\n",
> +                   param->value);
> +            ret = AVERROR(EINVAL);
> +            goto fail;
> +        }
> +    } else {
> +        device_type = CL_DEVICE_TYPE_DEFAULT;
> +    }
> +
> +    found = 0;
> +    for (p = 0; p < nb_platforms; p++) {
> +        int match = 1;
> +        for (i = 0; i < FF_ARRAY_ELEMS(opencl_platform_params); i++) {
> +            param = av_dict_get(opts, opencl_platform_params[i].key,
> +                                NULL, 0);
> +            if (!param)
> +                continue;
> +            cle = clGetPlatformInfo(platforms[p],
> +                                    opencl_platform_params[i].name,
> +                                    sizeof(tmp), tmp, &size);
> +            if (!av_stristr(tmp, param->value))
> +                match = 0;
> +        }
> +        if (!match)
> +            continue;
> +
> +        cle = clGetDeviceIDs(platforms[p], device_type,
> +                             0, NULL, &nb_devices);
> +        if (cle == CL_DEVICE_NOT_FOUND) {
> +            av_log(ctx, AV_LOG_VERBOSE, "No devices of type %lu found "
> +                   "on platform %d.\n", (unsigned long)device_type, cle);
> +            ret = AVERROR(ENOSYS);
> +            goto fail;
> +        } else if (cle != CL_SUCCESS) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to get number of "
> +                   "devices on platform %d: %d.\n", p, cle);
> +            ret = AVERROR(ENOSYS);
> +            goto fail;
> +        }
> +        av_log(ctx, AV_LOG_VERBOSE, "%d OpenCL devices found on "
> +               "platform %d.\n", nb_devices, p);
> +
> +        devices = av_malloc_array(nb_devices, sizeof(*devices));
> +        if (!devices) {
> +            ret = AVERROR(ENOMEM);
> +            goto fail;
> +        }
> +
> +        cle = clGetDeviceIDs(platforms[p], device_type,
> +                             nb_devices, devices, NULL);
> +        if (cle != CL_SUCCESS) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to get list of devices "
> +                   "on platform %d: %d.\n", p, cle);
> +            ret = AVERROR(ENODEV);
> +            goto fail;
> +        }
> +
> +        for (d = 0; d < nb_devices; d++) {
> +            match = 1;
> +            for (i = 0; i < FF_ARRAY_ELEMS(opencl_device_params); i++) {
> +                param = av_dict_get(opts, opencl_device_params[i].key,
> +                                    NULL, 0);
> +                if (!param)
> +                    continue;
> +                cle = clGetDeviceInfo(devices[d],
> +                                      opencl_device_params[i].name,
> +                                      sizeof(tmp), tmp, &size);
> +                if (!av_stristr(tmp, param->value))
> +                    match = 0;
> +            }
> +            if (!match)
> +                continue;
> +
> +            av_log(ctx, AV_LOG_VERBOSE, "Matched platform %d "
> +                   "device %d.\n", p, d);
> +            ++found;
> +            memcpy(&hwctx->platform_id, platforms + p, sizeof(*platforms));
> +            memcpy(&hwctx->device_id,   devices   + d, sizeof(*devices));
> +        }
> +
> +        av_freep(&devices);
> +    }
> +
> +    if (found == 0) {
> +        av_log(ctx, AV_LOG_ERROR, "No matching devices found.\n");
> +        ret = AVERROR(ENODEV);
> +        goto fail;
> +    }
> +    if (found > 1) {
> +        av_log(ctx, AV_LOG_ERROR, "More than one matching device found.\n");
> +        ret = AVERROR(ENODEV);
> +        goto fail;
> +    }
> +
> +    hwctx->context = clCreateContext(props, 1, &hwctx->device_id,
> +                                     &opencl_error_callback, ctx,
> +                                     &cle);
> +    if (!hwctx->context) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create OpenCL context: "
> +               "%d.\n", cle);
> +        ret = AVERROR(ENODEV);
> +        goto fail;
> +    }
> +
> +    ctx->free = &opencl_device_free;
> +
> +    ret = 0;
> +fail:
> +    av_freep(&platforms);
> +    av_freep(&devices);
> +    return ret;
> +}
> +
> +static int opencl_device_init(AVHWDeviceContext *hwdev)
> +{
> +    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
> +    OpenCLDeviceContext     *ctx = hwdev->internal->priv;
> +    cl_int cle;
> +
> +    if (!hwctx->command_queue) {
> +        ctx->internal_command_queue = 1;
> +        ctx->command_queue = clCreateCommandQueue(hwctx->context,
> +                                                  hwctx->device_id,
> +                                                  0, &cle);
> +        if (!ctx->command_queue) {
> +            av_log(hwdev, AV_LOG_ERROR, "Failed to create internal "
> +                   "command queue: %d.\n", cle);
> +            return AVERROR(EIO);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int opencl_device_create(AVHWDeviceContext *ctx, const char *device,
> +                                AVDictionary *opts, int flags)
> +{
> +    return opencl_device_create_internal(ctx, device, opts, flags, NULL);
> +}
> +
> +static void opencl_device_uninit(AVHWDeviceContext *hwdev)
> +{
> +    OpenCLDeviceContext *ctx = hwdev->internal->priv;
> +    cl_int cle;
> +
> +    if (ctx->internal_command_queue) {
> +        cle = clReleaseCommandQueue(ctx->command_queue);
> +        if (cle != CL_SUCCESS) {
> +            av_log(hwdev, AV_LOG_ERROR, "Failed to release internal "
> +                   "command queue: %d.\n", cle);
> +        }
> +    }
> +}
> +
> +static int opencl_get_plane_format(enum AVPixelFormat pixfmt,
> +                                   int plane, int width, int height,
> +                                   cl_image_format *image_format,
> +                                   cl_image_desc *image_desc)
> +{
> +    av_assert0(image_format && image_desc);
> +
> +    memset(image_format, 0, sizeof(*image_format));
> +    memset(image_desc,   0, sizeof(*image_desc));
> +    image_desc->image_type = CL_MEM_OBJECT_IMAGE2D;
> +
> +    switch (pixfmt) {
> +    case AV_PIX_FMT_YUV420P:
> +        if (width % 2)
> +            return AVERROR(EINVAL);
> +        if (plane > 2)
> +            return AVERROR(ENOENT);
> +        image_format->image_channel_order     = CL_R;
> +        image_format->image_channel_data_type = CL_UNORM_INT8;
> +        image_desc->image_width     = width  / (1 + (plane > 0));
> +        image_desc->image_height    = height / (1 + (plane > 0));
> +        image_desc->image_row_pitch = image_desc->image_width;
> +        break;
> +
> +    case AV_PIX_FMT_NV12:
> +        if (width % 2)
> +            return AVERROR(EINVAL);
> +        if (plane > 1)
> +            return AVERROR(ENOENT);
> +        image_format->image_channel_order     = plane ? CL_RG : CL_R;
> +        image_format->image_channel_data_type = CL_UNORM_INT8;
> +        image_desc->image_width     = width  / (1 + plane);
> +        image_desc->image_height    = height / (1 + plane);
> +        image_desc->image_row_pitch = width;
> +        break;
> +
> +    case AV_PIX_FMT_P010:
> +        if (width % 2)
> +            return AVERROR(EINVAL);
> +        if (plane > 1)
> +            return AVERROR(ENOENT);
> +        image_format->image_channel_order     = plane ? CL_RG : CL_R;
> +        image_format->image_channel_data_type = CL_UNORM_INT16;
> +        image_desc->image_width     = width  / (1 + plane);
> +        image_desc->image_height    = height / (1 + plane);
> +        image_desc->image_row_pitch = 2 * width;
> +        break;
> +
> +    case AV_PIX_FMT_RGBA:
> +        image_format->image_channel_order = CL_RGBA;
> +        if (0)
> +    case AV_PIX_FMT_BGRA:
> +        image_format->image_channel_order = CL_BGRA;
> +        if (0)
> +    case AV_PIX_FMT_ARGB:
> +        image_format->image_channel_order = CL_ARGB;
> +#ifdef CL_ABGR
> +        if (0)

wut

> +    case AV_PIX_FMT_ABGR:
> +        image_format->image_channel_order = CL_ABGR;
> +#endif
> +        if (plane > 0)
> +            return AVERROR(ENOENT);
> +        image_format->image_channel_data_type = CL_UNORM_INT8;
> +        image_desc->image_width     = width;
> +        image_desc->image_height    = height;
> +        image_desc->image_row_pitch = 4 * width;
> +        break;
> +
> +    default:
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +static int opencl_frames_get_constraints(AVHWDeviceContext *hwdev,
> +                                         const void *hwconfig,
> +                                         AVHWFramesConstraints *constraints)
> +{
> +    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
> +    cl_uint nb_image_formats;
> +    cl_image_format *image_formats = NULL;
> +    cl_int cle;
> +    enum AVPixelFormat pix_fmt;
> +    int err, pix_fmts_found;
> +    size_t max_width, max_height;
> +
> +    cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH,
> +                          sizeof(max_width), &max_width, NULL);
> +    if (cle != CL_SUCCESS) {
> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
> +               "supported image width: %d.\n", cle);
> +    } else {
> +        constraints->max_width = max_width;
> +    }
> +    cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
> +                          sizeof(max_height), &max_height, NULL);
> +    if (cle != CL_SUCCESS) {
> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
> +               "supported image height: %d.\n", cle);
> +    } else {
> +        constraints->max_height = max_height;
> +    }
> +    av_log(hwdev, AV_LOG_DEBUG, "Maximum supported image size %dx%d.\n",
> +           constraints->max_width, constraints->max_height);
> +
> +    cle = clGetSupportedImageFormats(hwctx->context, 0,
> +                                     CL_MEM_OBJECT_IMAGE2D,
> +                                     0, NULL, &nb_image_formats);
> +    if (cle != CL_SUCCESS) {
> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
> +               "image formats: %d.\n", cle);
> +        err = AVERROR(ENOSYS);
> +        goto fail;
> +    }
> +    if (nb_image_formats == 0) {
> +        av_log(hwdev, AV_LOG_ERROR, "No image support in OpenCL "
> +               "driver (zero supported image formats).\n");
> +        err = AVERROR(ENOSYS);
> +        goto fail;
> +    }
> +
> +    image_formats =
> +        av_malloc(nb_image_formats * sizeof(*image_formats));
> +    if (!image_formats) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    cle = clGetSupportedImageFormats(hwctx->context, 0,
> +                                     CL_MEM_OBJECT_IMAGE2D,
> +                                     nb_image_formats,
> +                                     image_formats, NULL);
> +    if (cle != CL_SUCCESS) {
> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
> +               "image formats: %d.\n", cle);
> +        err = AVERROR(ENOSYS);
> +        goto fail;
> +    }
> +
> +    pix_fmts_found = 0;
> +    for (pix_fmt = 0; pix_fmt < AV_PIX_FMT_NB; pix_fmt++) {

Multiple API calls, looping over _all_ libavutil pixfmts... seems to do
a lot of work for a call you'd expect to be fast and often called.
Maybe not a problem.

> +        cl_image_format image_format;
> +        cl_image_desc   image_desc;
> +        int plane, i;
> +
> +        for (plane = 0;; plane++) {
> +            err = opencl_get_plane_format(pix_fmt, plane, 0, 0,
> +                                          &image_format,
> +                                          &image_desc);
> +            if (err < 0)
> +                break;
> +
> +            for (i = 0; i < nb_image_formats; i++) {
> +                if (image_formats[i].image_channel_order ==
> +                    image_format.image_channel_order &&
> +                    image_formats[i].image_channel_data_type ==
> +                    image_format.image_channel_data_type)
> +                    break;
> +            }
> +            if (i == nb_image_formats) {
> +                err = AVERROR(EINVAL);
> +                break;
> +            }
> +        }
> +        if (err != AVERROR(ENOENT))
> +            continue;
> +
> +        av_log(hwdev, AV_LOG_DEBUG, "Format %s supported.\n",
> +               av_get_pix_fmt_name(pix_fmt));
> +
> +        constraints->valid_sw_formats =
> +            av_realloc_array(constraints->valid_sw_formats,
> +                             pix_fmts_found + 2,
> +                             sizeof(*constraints->valid_sw_formats));
> +        if (!constraints->valid_sw_formats) {
> +            err = AVERROR(ENOMEM);
> +            goto fail;
> +        }
> +        constraints->valid_sw_formats[pix_fmts_found] = pix_fmt;
> +        constraints->valid_sw_formats[pix_fmts_found + 1] =
> +            AV_PIX_FMT_NONE;
> +        ++pix_fmts_found;

(Don't mind me, just stylistically objecting to pre-decrement.)

> +    }
> +
> +    av_freep(&image_formats);
> +
> +    constraints->valid_hw_formats =
> +        av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
> +    if (!constraints->valid_hw_formats) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +    constraints->valid_hw_formats[0] = AV_PIX_FMT_OPENCL;
> +    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
> +
> +    return 0;
> +
> +fail:
> +    av_freep(&image_formats);
> +    return err;
> +}
> +
> +static void opencl_buffer_free(void *opaque, uint8_t *data)
> +{
> +    AVHWFramesContext *hwfc = opaque;
> +    cl_int cle;
> +
> +    cle = clReleaseMemObject((cl_mem)data);
> +    if (cle != CL_SUCCESS) {
> +        av_log(hwfc, AV_LOG_ERROR, "Failed to release buffer: %d.\n",
> +               cle);
> +    }
> +}
> +
> +static AVBufferRef *opencl_pool_alloc(void *opaque, int size)
> +{
> +    AVHWFramesContext      *hwfc = opaque;
> +    AVOpenCLDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> +    cl_int cle;
> +    cl_mem mem;
> +    size_t total_size;
> +    int err, plane;
> +    AVBufferRef *ref;
> +
> +    // We allocate a buffer covering the whole image and then make
> +    // sub-buffers for each plane.  This makes sure that the image is
> +    // one large object so that we can share with other APIs.
> +
> +    total_size = 0;
> +
> +    for (plane = 0;; plane++) {
> +        cl_image_format image_format;
> +        cl_image_desc   image_desc;
> +
> +        err = opencl_get_plane_format(hwfc->sw_format, plane,
> +                                      hwfc->width, hwfc->height,
> +                                      &image_format, &image_desc);
> +        if (err < 0)
> +            break;
> +
> +        total_size += (image_desc.image_row_pitch *
> +                       image_desc.image_height);
> +    }
> +
> +    mem = clCreateBuffer(hwctx->context, CL_MEM_READ_WRITE,
> +                         total_size, NULL, &cle);
> +    if (!mem) {
> +        av_log(hwfc, AV_LOG_ERROR, "Failed to allocate buffer "
> +               "(%zu bytes): %d.\n", total_size, cle);
> +        return NULL;
> +    }
> +
> +    ref = av_buffer_create((uint8_t*)mem, sizeof(cl_mem),
> +                           &opencl_buffer_free, hwfc, 0);
> +    if (!ref)
> +        return NULL;
> +
> +    return ref;
> +}
> +
> +static int opencl_frames_init(AVHWFramesContext *hwfc)
> +{
> +    if (!hwfc->pool) {
> +        hwfc->internal->pool_internal =
> +            av_buffer_pool_init2(sizeof(cl_mem), hwfc,
> +                                 &opencl_pool_alloc, NULL);
> +        if (!hwfc->internal->pool_internal)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    return 0;
> +}
> +
> +static int opencl_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
> +{
> +    AVOpenCLDeviceContext *hwctx = hwfc->device_ctx->hwctx;
> +    AVBufferRef *outer_ref = NULL;
> +    cl_mem mem, mem_plane[MAX_PLANES], image_plane[MAX_PLANES];
> +    cl_int cle;
> +    size_t offset;
> +    int err, plane;
> +
> +    outer_ref = av_buffer_pool_get(hwfc->pool);
> +    if (!outer_ref)
> +        return AVERROR(ENOMEM);
> +
> +    mem = (cl_mem)outer_ref->data;
> +
> +    offset = 0;
> +    for (plane = 0;; plane++) {
> +        cl_buffer_region region;
> +        cl_image_format  image_format;
> +        cl_image_desc    image_desc;
> +
> +        err = opencl_get_plane_format(hwfc->sw_format, plane,
> +                                      hwfc->width, hwfc->height,
> +                                      &image_format, &image_desc);
> +        if (err == AVERROR(ENOENT))
> +            break;
> +        if (err < 0)
> +            return err;
> +
> +        region.origin = offset;
> +        region.size   = (image_desc.image_row_pitch *
> +                         image_desc.image_height);
> +
> +        mem_plane[plane] = clCreateSubBuffer(mem, CL_MEM_READ_WRITE,
> +                                             CL_BUFFER_CREATE_TYPE_REGION,
> +                                             &region, &cle);
> +        if (!mem_plane[plane]) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to create sub-buffer "
> +                   "for plane %d: %d.\n", plane, cle);
> +            return AVERROR(EIO);
> +        }
> +
> +        image_desc.buffer = mem_plane[plane];
> +
> +        image_plane[plane] = clCreateImage(hwctx->context,
> +                                           CL_MEM_READ_WRITE,
> +                                           &image_format,
> +                                           &image_desc,
> +                                           NULL, &cle);
> +        if (!image_plane[plane]) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to create image from "
> +                   "plane %d sub-buffer: %d.\n", plane, cle);
> +            return AVERROR(EIO);
> +        }
> +
> +        offset += region.size;
> +        frame->data[plane] = (uint8_t*)image_plane[plane];
> +        frame->linesize[plane] = hwfc->width;
> +
> +        frame->buf[plane] = av_buffer_create((uint8_t*)image_plane[plane],
> +                                             sizeof(cl_mem),
> +                                             &opencl_buffer_free,
> +                                             hwfc, 0);
> +        if (!frame->buf[plane])
> +            return AVERROR(ENOMEM);
> +
> +        // Unreference the sub-buffer object immediately: we don't need
> +        // to access it directly and the image object is usable without
> +        // an explicit reference held externally.
> +        cle = clReleaseMemObject(mem_plane[plane]);
> +        if (cle != CL_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to release image "
> +                   "buffer: %d.\n", cle);
> +        }
> +    }
> +
> +    frame->buf[plane] = outer_ref;
> +
> +    frame->format  = AV_PIX_FMT_OPENCL;
> +    frame->width   = hwfc->width;
> +    frame->height  = hwfc->height;
> +
> +    return 0;
> +}
> +
> +static int opencl_transfer_get_formats(AVHWFramesContext *hwfc,
> +                                       enum AVHWFrameTransferDirection dir,
> +                                       enum AVPixelFormat **formats)
> +{
> +    enum AVPixelFormat *fmts;
> +
> +    fmts = av_malloc_array(2, sizeof(*fmts));
> +    if (!fmts)
> +        return AVERROR(ENOMEM);
> +
> +    fmts[0] = hwfc->sw_format;
> +    fmts[1] = AV_PIX_FMT_NONE;
> +
> +    *formats = fmts;
> +    return 0;
> +}
> +
> +static cl_command_queue opencl_get_command_queue(AVHWFramesContext *hwfc)
> +{
> +    AVOpenCLFramesContext *fc = hwfc->hwctx;
> +    AVOpenCLDeviceContext *dc = hwfc->device_ctx->hwctx;
> +    OpenCLDeviceContext  *ctx = hwfc->device_ctx->internal->priv;
> +
> +    if (fc->command_queue)
> +        return fc->command_queue;
> +    if (dc->command_queue)
> +        return dc->command_queue;
> +    av_assert0(ctx->command_queue);
> +    return ctx->command_queue;
> +}
> +
> +static int opencl_wait_events(AVHWFramesContext *hwfc,
> +                              cl_event *events, int nb_events)
> +{
> +    cl_int cle;
> +    int i;
> +
> +    cle = clWaitForEvents(nb_events, events);
> +    if (cle != CL_SUCCESS) {
> +        av_log(hwfc, AV_LOG_ERROR, "Failed to wait for event "
> +               "completion: %d.\n", cle);
> +        return AVERROR(EIO);
> +    }
> +
> +    for (i = 0; i < nb_events; i++) {
> +        cle = clReleaseEvent(events[i]);
> +        if (cle != CL_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to release "
> +                   "event: %d.\n", cle);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int opencl_transfer_data_from(AVHWFramesContext *hwfc,
> +                                     AVFrame *dst, const AVFrame *src)
> +{
> +    cl_image_format image_format;
> +    cl_image_desc image_desc;
> +    cl_int cle;
> +    size_t origin[3] = { 0, 0, 0 };
> +    size_t region[3];
> +    cl_event events[MAX_PLANES];
> +    int err, plane;
> +
> +    if (dst->format != hwfc->sw_format)
> +        return AVERROR(EINVAL);
> +
> +    for (plane = 0;; plane++) {
> +        err = opencl_get_plane_format(hwfc->sw_format, plane,
> +                                      src->width, src->height,
> +                                      &image_format, &image_desc);
> +        if (err == AVERROR(ENOENT))
> +            break;
> +        if (err < 0)
> +            return err;
> +
> +        region[0] = image_desc.image_width;
> +        region[1] = image_desc.image_height;
> +        region[2] = 1;
> +
> +        cle = clEnqueueReadImage(opencl_get_command_queue(hwfc),
> +                                 (cl_mem)src->data[plane],
> +                                 CL_TRUE, origin, region,
> +                                 dst->linesize[plane], 0,
> +                                 dst->data[plane],
> +                                 0, NULL, &events[plane]);
> +        if (cle != CL_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue read of "
> +                   "OpenCL image plane %d: %d.\n", plane, cle);
> +            return AVERROR(EIO);
> +        }
> +    }
> +
> +    return opencl_wait_events(hwfc, events, plane);
> +}
> +
> +static int opencl_transfer_data_to(AVHWFramesContext *hwfc,
> +                                   AVFrame *dst, const AVFrame *src)
> +{
> +    cl_image_format image_format;
> +    cl_image_desc image_desc;
> +    cl_int cle;
> +    size_t origin[3] = { 0, 0, 0 };
> +    size_t region[3];
> +    cl_event events[MAX_PLANES];
> +    int err, plane;
> +
> +    for (plane = 0;; plane++) {
> +        err = opencl_get_plane_format(hwfc->sw_format, plane,
> +                                      src->width, src->height,
> +                                      &image_format, &image_desc);
> +        if (err == AVERROR(ENOENT))
> +            break;
> +        if (err < 0)
> +            return err;
> +
> +        region[0] = image_desc.image_width;
> +        region[1] = image_desc.image_height;
> +        region[2] = 1;
> +
> +        cle = clEnqueueWriteImage(opencl_get_command_queue(hwfc),
> +                                  (cl_mem)dst->data[plane],
> +                                  CL_TRUE, origin, region,
> +                                  src->linesize[plane], 0,
> +                                  src->data[plane],
> +                                  0, NULL, &events[plane]);
> +        if (cle != CL_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to enqueue write of "
> +                   "OpenCL image plane %d: %d.\n", plane, cle);
> +            return AVERROR(EIO);
> +        }
> +    }
> +
> +    return opencl_wait_events(hwfc, events, plane);
> +}
> +
> +typedef struct OpenCLMapping {
> +    // The mapped addresses for each plane.
> +    // The destination frame is not available when we unmap, so these
> +    // need to be stored separately.
> +    void *address[MAX_PLANES];
> +} OpenCLMapping;
> +
> +static void opencl_unmap_frame(AVHWFramesContext *hwfc,
> +                               HWMapDescriptor *hwmap)
> +{
> +    OpenCLMapping *map = hwmap->priv;
> +    cl_event events[MAX_PLANES];
> +    int plane;
> +    cl_int cle;
> +
> +    for (plane = 0; plane < FF_ARRAY_ELEMS(map->address); plane++) {
> +        if (!map->address[plane])
> +            break;
> +
> +        cle = clEnqueueUnmapMemObject(opencl_get_command_queue(hwfc),
> +                                      (cl_mem)hwmap->source->data[plane],
> +                                      map->address[plane],
> +                                      0, NULL, &events[plane]);
> +        if (cle != CL_SUCCESS) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to unmap OpenCL "
> +                   "image plane %d: %d.\n", plane, cle);
> +        }
> +    }
> +
> +    opencl_wait_events(hwfc, events, plane);
> +
> +    av_free(map);
> +}
> +
> +static int opencl_map_frame(AVHWFramesContext *hwfc, AVFrame *dst,
> +                            const AVFrame *src, int flags)
> +{
> +    cl_command_queue command_queue = opencl_get_command_queue(hwfc);
> +    cl_map_flags map_flags;
> +    cl_image_format image_format;
> +    cl_image_desc image_desc;
> +    cl_int cle;
> +    OpenCLMapping *map;
> +    size_t origin[3] = { 0, 0, 0 };
> +    size_t region[3];
> +    size_t row_pitch;
> +    cl_event events[MAX_PLANES];
> +    int err, plane;
> +
> +    av_assert0(hwfc->sw_format == dst->format);
> +
> +    if (flags & AV_HWFRAME_MAP_OVERWRITE &&
> +        !(flags & AV_HWFRAME_MAP_READ)) {
> +        // This is mutually exclusive with the read/write flags, so
> +        // there is no way to map with read here.
> +        map_flags = CL_MAP_WRITE_INVALIDATE_REGION;
> +    } else {
> +        map_flags = 0;
> +        if (flags & AV_HWFRAME_MAP_READ)
> +            map_flags |= CL_MAP_READ;
> +        if (flags & AV_HWFRAME_MAP_WRITE)
> +            map_flags |= CL_MAP_WRITE;
> +    }
> +
> +    map = av_mallocz(sizeof(*map));
> +    if (!map)
> +        return AVERROR(ENOMEM);
> +
> +    for (plane = 0;; plane++) {
> +        err = opencl_get_plane_format(hwfc->sw_format, plane,
> +                                      src->width, src->height,
> +                                      &image_format, &image_desc);
> +        if (err == AVERROR(ENOENT))
> +            break;
> +        if (err < 0)
> +            goto fail;
> +
> +        region[0] = image_desc.image_width;
> +        region[1] = image_desc.image_height;
> +        region[2] = 1;
> +
> +        map->address[plane] =
> +            clEnqueueMapImage(command_queue,
> +                              (cl_mem)src->data[plane],
> +                              CL_TRUE, map_flags, origin, region,
> +                              &row_pitch, NULL, 0, NULL,
> +                              &events[plane], &cle);
> +        if (!map->address[plane]) {
> +            av_log(hwfc, AV_LOG_ERROR, "Failed to map OpenCL "
> +                   "image plane %d: %d.\n", plane, cle);
> +            err = AVERROR(EIO);
> +            goto fail;
> +        }
> +
> +        dst->data[plane] = map->address[plane];
> +        dst->linesize[plane] = image_desc.image_row_pitch;
> +
> +        av_log(hwfc, AV_LOG_DEBUG, "Map plane %d (%p -> %p).\n",
> +               plane, src->data[plane], dst->data[plane]);
> +    }
> +
> +    err = opencl_wait_events(hwfc, events, plane);
> +    if (err < 0)
> +        goto fail;
> +
> +    err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
> +                                &opencl_unmap_frame, map);
> +    if (err < 0)
> +        goto fail;
> +
> +    dst->width  = src->width;
> +    dst->height = src->height;
> +
> +    return 0;
> +
> +fail:
> +    for (plane = 0; plane < MAX_PLANES; plane++) {
> +        if (!map->address[plane])
> +            break;
> +        clEnqueueUnmapMemObject(command_queue,
> +                                (cl_mem)src->data[plane],
> +                                map->address[plane],
> +                                0, NULL, &events[plane]);
> +    }
> +    if (plane > 0)
> +        opencl_wait_events(hwfc, events, plane);
> +    av_freep(&map);
> +    return err;
> +}
> +
> +static int opencl_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
> +                         const AVFrame *src, int flags)
> +{
> +    av_assert0(dst->format == AV_PIX_FMT_OPENCL);
> +    switch (src->format) {
> +    default:
> +        return AVERROR(ENOSYS);
> +    }
> +}
> +
> +static int opencl_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
> +                           const AVFrame *src, int flags)
> +{
> +    av_assert0(src->format == AV_PIX_FMT_OPENCL);
> +    switch (dst->format) {
> +    default:
> +        if (hwfc->sw_format != dst->format)
> +            return AVERROR(ENOSYS);
> +        return opencl_map_frame(hwfc, dst, src, flags);
> +    }
> +}
> +
> +const HWContextType ff_hwcontext_type_opencl = {
> +    .type                   = AV_HWDEVICE_TYPE_OPENCL,
> +    .name                   = "OpenCL",
> +
> +    .device_hwctx_size      = sizeof(AVOpenCLDeviceContext),
> +    .device_priv_size       = sizeof(OpenCLDeviceContext),
> +    .frames_hwctx_size      = sizeof(AVOpenCLFramesContext),
> +
> +    .device_create          = &opencl_device_create,
> +    .device_init            = &opencl_device_init,
> +    .device_uninit          = &opencl_device_uninit,
> +
> +    .frames_get_constraints = &opencl_frames_get_constraints,
> +    .frames_init            = &opencl_frames_init,
> +    .frames_uninit          = NULL,
> +    .frames_get_buffer      = &opencl_get_buffer,
> +
> +    .transfer_get_formats   = &opencl_transfer_get_formats,
> +    .transfer_data_to       = &opencl_transfer_data_to,
> +    .transfer_data_from     = &opencl_transfer_data_from,
> +
> +    .map_to                 = &opencl_map_to,
> +    .map_from               = &opencl_map_from,
> +
> +    .pix_fmts = (const enum AVPixelFormat[]) {
> +        AV_PIX_FMT_OPENCL,
> +        AV_PIX_FMT_NONE
> +    },
> +};
> diff --git a/libavutil/hwcontext_opencl.h b/libavutil/hwcontext_opencl.h
> new file mode 100644
> index 000000000..955c1797e
> --- /dev/null
> +++ b/libavutil/hwcontext_opencl.h
> @@ -0,0 +1,78 @@
> +/*
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#ifndef AVUTIL_HWCONTEXT_OPENCL_H
> +#define AVUTIL_HWCONTEXT_OPENCL_H
> +
> +#include <CL/cl.h>
> +
> +/**
> + * @file
> + * API-specific header for AV_HWDEVICE_TYPE_OPENCL.
> + *
> + * Pools allocated internally are always dynamic, and are primarily intended
> + * to be used in OpenCL-only cases.  If interoperation is required, it is
> + * typically required to allocate frames in the other API and then map the
> + * frames context to OpenCL with av_hwframe_ctx_create_derived().
> + *
> + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
> + * with the data pointer pointing at a cl_mem object which is a buffer large
> + * enough to contain all of the data of the intended frame.  If the frame
> + * has multiple planes, the images are created in sub-buffers of the buffer
> + * in the pool.
> + */
> +
> +/**
> + * OpenCL device details.
> + *
> + * Allocated as AVHWDeviceContext.hwctx
> + */
> +typedef struct AVOpenCLDeviceContext {
> +    cl_platform_id platform_id;
> +    cl_device_id   device_id;
> +    /**
> +     * The OpenCL context for all operationa on this device.
> +     */
> +    cl_context       context;
> +    /**
> +     * The default command queue for this device, which will be used by all
> +     * frames contexts which do not have their own command queue.  If not
> +     * intialised by the user, a default queue will be created.
> +     */
> +    cl_command_queue command_queue;
> +} AVOpenCLDeviceContext;
> +
> +/**
> + * OpenCL-specific data associated with a frame pool.
> + *
> + * Allocated as AVHWFramesContext.hwctx.
> + */
> +typedef struct AVOpenCLFramesContext {
> +    /**
> +     * The command queue used for internal asynchronous operations on this
> +     * device (av_hwframe_transfer_data(), av_hwframe_map()).  Note that
> +     * currently these are made synchronous by calling clFinish() at the
> +     * end of every operation sequence.
> +     *
> +     * If this is not set, the command queue from the associated device is
> +     * used instead.
> +     */
> +    cl_command_queue command_queue;
> +} AVOpenCLFramesContext;
> +
> +#endif /* AVUTIL_HWCONTEXT_OPENCL_H */
> diff --git a/libavutil/version.h b/libavutil/version.h
> index 0f2b684fa..7c0e85249 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -54,7 +54,7 @@
>   */
>  
>  #define LIBAVUTIL_VERSION_MAJOR 55
> -#define LIBAVUTIL_VERSION_MINOR 33
> +#define LIBAVUTIL_VERSION_MINOR 34
>  #define LIBAVUTIL_VERSION_MICRO  0
>  
>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \

Can't comment much on the actual code here.

Isn't it a bit strange that we also have hwcontext_cuda, and they don't
interact at all?

Those complex command queue operations might need a lock around them to
make them thread-safe. (I sure as hell don't want to need fragile
external locking around this API just because I might do
decoding/filtering/rendering on different threads to some degree.)

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to