On 20/02/17 06:39, wm4 wrote:
> On Sun, 19 Feb 2017 18:46:40 +0000
> Mark Thompson <[email protected]> wrote:
> 
>> ---
>>  configure                      |   3 +
>>  doc/APIchanges                 |   4 +
>>  libavutil/Makefile             |   2 +
>>  libavutil/hwcontext.c          |   3 +
>>  libavutil/hwcontext.h          |   1 +
>>  libavutil/hwcontext_internal.h |   1 +
>>  libavutil/hwcontext_opencl.c   | 983 
>> +++++++++++++++++++++++++++++++++++++++++
>>  libavutil/hwcontext_opencl.h   |  78 ++++
>>  libavutil/version.h            |   2 +-
>>  9 files changed, 1076 insertions(+), 1 deletion(-)
>>  create mode 100644 libavutil/hwcontext_opencl.c
>>  create mode 100644 libavutil/hwcontext_opencl.h
>>
>> ...
>> +
>> +static int opencl_get_plane_format(enum AVPixelFormat pixfmt,
>> +                                   int plane, int width, int height,
>> +                                   cl_image_format *image_format,
>> +                                   cl_image_desc *image_desc)
>> +{
>> +    av_assert0(image_format && image_desc);
>> +
>> +    memset(image_format, 0, sizeof(*image_format));
>> +    memset(image_desc,   0, sizeof(*image_desc));
>> +    image_desc->image_type = CL_MEM_OBJECT_IMAGE2D;
>> +
>> +    switch (pixfmt) {
>> +    case AV_PIX_FMT_YUV420P:
>> +        if (width % 2)
>> +            return AVERROR(EINVAL);
>> +        if (plane > 2)
>> +            return AVERROR(ENOENT);
>> +        image_format->image_channel_order     = CL_R;
>> +        image_format->image_channel_data_type = CL_UNORM_INT8;
>> +        image_desc->image_width     = width  / (1 + (plane > 0));
>> +        image_desc->image_height    = height / (1 + (plane > 0));
>> +        image_desc->image_row_pitch = image_desc->image_width;
>> +        break;
>> +
>> +    case AV_PIX_FMT_NV12:
>> +        if (width % 2)
>> +            return AVERROR(EINVAL);
>> +        if (plane > 1)
>> +            return AVERROR(ENOENT);
>> +        image_format->image_channel_order     = plane ? CL_RG : CL_R;
>> +        image_format->image_channel_data_type = CL_UNORM_INT8;
>> +        image_desc->image_width     = width  / (1 + plane);
>> +        image_desc->image_height    = height / (1 + plane);
>> +        image_desc->image_row_pitch = width;
>> +        break;
>> +
>> +    case AV_PIX_FMT_P010:
>> +        if (width % 2)
>> +            return AVERROR(EINVAL);
>> +        if (plane > 1)
>> +            return AVERROR(ENOENT);
>> +        image_format->image_channel_order     = plane ? CL_RG : CL_R;
>> +        image_format->image_channel_data_type = CL_UNORM_INT16;
>> +        image_desc->image_width     = width  / (1 + plane);
>> +        image_desc->image_height    = height / (1 + plane);
>> +        image_desc->image_row_pitch = 2 * width;
>> +        break;
>> +
>> +    case AV_PIX_FMT_RGBA:
>> +        image_format->image_channel_order = CL_RGBA;
>> +        if (0)
>> +    case AV_PIX_FMT_BGRA:
>> +        image_format->image_channel_order = CL_BGRA;
>> +        if (0)
>> +    case AV_PIX_FMT_ARGB:
>> +        image_format->image_channel_order = CL_ARGB;
>> +#ifdef CL_ABGR
>> +        if (0)
> 
> wut

CL_ABGR didn't exist in OpenCL 1.2, which is really the target.  It's present 
in later versions, though, so it's included here if possible for symmetry.

>> +    case AV_PIX_FMT_ABGR:
>> +        image_format->image_channel_order = CL_ABGR;
>> +#endif
>> +        if (plane > 0)
>> +            return AVERROR(ENOENT);
>> +        image_format->image_channel_data_type = CL_UNORM_INT8;
>> +        image_desc->image_width     = width;
>> +        image_desc->image_height    = height;
>> +        image_desc->image_row_pitch = 4 * width;
>> +        break;
>> +
>> +    default:
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int opencl_frames_get_constraints(AVHWDeviceContext *hwdev,
>> +                                         const void *hwconfig,
>> +                                         AVHWFramesConstraints *constraints)
>> +{
>> +    AVOpenCLDeviceContext *hwctx = hwdev->hwctx;
>> +    cl_uint nb_image_formats;
>> +    cl_image_format *image_formats = NULL;
>> +    cl_int cle;
>> +    enum AVPixelFormat pix_fmt;
>> +    int err, pix_fmts_found;
>> +    size_t max_width, max_height;
>> +
>> +    cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH,
>> +                          sizeof(max_width), &max_width, NULL);
>> +    if (cle != CL_SUCCESS) {
>> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
>> +               "supported image width: %d.\n", cle);
>> +    } else {
>> +        constraints->max_width = max_width;
>> +    }
>> +    cle = clGetDeviceInfo(hwctx->device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
>> +                          sizeof(max_height), &max_height, NULL);
>> +    if (cle != CL_SUCCESS) {
>> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query maximum "
>> +               "supported image height: %d.\n", cle);
>> +    } else {
>> +        constraints->max_height = max_height;
>> +    }
>> +    av_log(hwdev, AV_LOG_DEBUG, "Maximum supported image size %dx%d.\n",
>> +           constraints->max_width, constraints->max_height);
>> +
>> +    cle = clGetSupportedImageFormats(hwctx->context, 0,
>> +                                     CL_MEM_OBJECT_IMAGE2D,
>> +                                     0, NULL, &nb_image_formats);
>> +    if (cle != CL_SUCCESS) {
>> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
>> +               "image formats: %d.\n", cle);
>> +        err = AVERROR(ENOSYS);
>> +        goto fail;
>> +    }
>> +    if (nb_image_formats == 0) {
>> +        av_log(hwdev, AV_LOG_ERROR, "No image support in OpenCL "
>> +               "driver (zero supported image formats).\n");
>> +        err = AVERROR(ENOSYS);
>> +        goto fail;
>> +    }
>> +
>> +    image_formats =
>> +        av_malloc(nb_image_formats * sizeof(*image_formats));
>> +    if (!image_formats) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    cle = clGetSupportedImageFormats(hwctx->context, 0,
>> +                                     CL_MEM_OBJECT_IMAGE2D,
>> +                                     nb_image_formats,
>> +                                     image_formats, NULL);
>> +    if (cle != CL_SUCCESS) {
>> +        av_log(hwdev, AV_LOG_ERROR, "Failed to query supported "
>> +               "image formats: %d.\n", cle);
>> +        err = AVERROR(ENOSYS);
>> +        goto fail;
>> +    }
>> +
>> +    pix_fmts_found = 0;
>> +    for (pix_fmt = 0; pix_fmt < AV_PIX_FMT_NB; pix_fmt++) {
> 
> Multiple API calls, looping over _all_ libavutil pixfmts... seems to do
> a lot of work for a call you'd expect to be fast and often called.
> Maybe not a problem.

Often?  I'd expect this to be called once in some setup function for each 
component and never thereafter unless something changes.  Also, nothing in the 
loop calls anything external, except possibly realloc for valid formats.  (The 
opencl_get_plane_format() function is trying to be the one place where you need 
to add new formats.)

>> +        cl_image_format image_format;
>> +        cl_image_desc   image_desc;
>> +        int plane, i;
>> +
>> +        for (plane = 0;; plane++) {
>> +            err = opencl_get_plane_format(pix_fmt, plane, 0, 0,
>> +                                          &image_format,
>> +                                          &image_desc);
>> +            if (err < 0)
>> +                break;
>> +
>> +            for (i = 0; i < nb_image_formats; i++) {
>> +                if (image_formats[i].image_channel_order ==
>> +                    image_format.image_channel_order &&
>> +                    image_formats[i].image_channel_data_type ==
>> +                    image_format.image_channel_data_type)
>> +                    break;
>> +            }
>> +            if (i == nb_image_formats) {
>> +                err = AVERROR(EINVAL);
>> +                break;
>> +            }
>> +        }
>> +        if (err != AVERROR(ENOENT))
>> +            continue;
>> +
>> +        av_log(hwdev, AV_LOG_DEBUG, "Format %s supported.\n",
>> +               av_get_pix_fmt_name(pix_fmt));
>> +
>> +        constraints->valid_sw_formats =
>> +            av_realloc_array(constraints->valid_sw_formats,
>> +                             pix_fmts_found + 2,
>> +                             sizeof(*constraints->valid_sw_formats));
>> +        if (!constraints->valid_sw_formats) {
>> +            err = AVERROR(ENOMEM);
>> +            goto fail;
>> +        }
>> +        constraints->valid_sw_formats[pix_fmts_found] = pix_fmt;
>> +        constraints->valid_sw_formats[pix_fmts_found + 1] =
>> +            AV_PIX_FMT_NONE;
>> +        ++pix_fmts_found;
> 
> (Don't mind me, just stylistically objecting to pre-decrement.)

(Well, don't mind me not caring, then.)

>> +    }
>> +
>> +    av_freep(&image_formats);
>> +
>> +    constraints->valid_hw_formats =
>> +        av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
>> +    if (!constraints->valid_hw_formats) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +    constraints->valid_hw_formats[0] = AV_PIX_FMT_OPENCL;
>> +    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
>> +
>> +    return 0;
>> +
>> +fail:
>> +    av_freep(&image_formats);
>> +    return err;
>> +}
>> ...
>> diff --git a/libavutil/hwcontext_opencl.h b/libavutil/hwcontext_opencl.h
>> new file mode 100644
>> index 000000000..955c1797e
>> --- /dev/null
>> +++ b/libavutil/hwcontext_opencl.h
>> @@ -0,0 +1,78 @@
>> +/*
>> + * This file is part of Libav.
>> + *
>> + * Libav is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * Libav is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with Libav; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
>> USA
>> + */
>> +
>> +#ifndef AVUTIL_HWCONTEXT_OPENCL_H
>> +#define AVUTIL_HWCONTEXT_OPENCL_H
>> +
>> +#include <CL/cl.h>
>> +
>> +/**
>> + * @file
>> + * API-specific header for AV_HWDEVICE_TYPE_OPENCL.
>> + *
>> + * Pools allocated internally are always dynamic, and are primarily intended
>> + * to be used in OpenCL-only cases.  If interoperation is required, it is
>> + * typically required to allocate frames in the other API and then map the
>> + * frames context to OpenCL with av_hwframe_ctx_create_derived().
>> + *
>> + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
>> + * with the data pointer pointing at a cl_mem object which is a buffer large
>> + * enough to contain all of the data of the intended frame.  If the frame
>> + * has multiple planes, the images are created in sub-buffers of the buffer
>> + * in the pool.
>> + */
>> +
>> +/**
>> + * OpenCL device details.
>> + *
>> + * Allocated as AVHWDeviceContext.hwctx
>> + */
>> +typedef struct AVOpenCLDeviceContext {
>> +    cl_platform_id platform_id;
>> +    cl_device_id   device_id;
>> +    /**
>> +     * The OpenCL context for all operationa on this device.
>> +     */
>> +    cl_context       context;
>> +    /**
>> +     * The default command queue for this device, which will be used by all
>> +     * frames contexts which do not have their own command queue.  If not
>> +     * intialised by the user, a default queue will be created.
>> +     */
>> +    cl_command_queue command_queue;
>> +} AVOpenCLDeviceContext;
>> +
>> +/**
>> + * OpenCL-specific data associated with a frame pool.
>> + *
>> + * Allocated as AVHWFramesContext.hwctx.
>> + */
>> +typedef struct AVOpenCLFramesContext {
>> +    /**
>> +     * The command queue used for internal asynchronous operations on this
>> +     * device (av_hwframe_transfer_data(), av_hwframe_map()).  Note that
>> +     * currently these are made synchronous by calling clFinish() at the
>> +     * end of every operation sequence.
>> +     *
>> +     * If this is not set, the command queue from the associated device is
>> +     * used instead.
>> +     */
>> +    cl_command_queue command_queue;
>> +} AVOpenCLFramesContext;
>> +
>> +#endif /* AVUTIL_HWCONTEXT_OPENCL_H */
>> diff --git a/libavutil/version.h b/libavutil/version.h
>> index 0f2b684fa..7c0e85249 100644
>> --- a/libavutil/version.h
>> +++ b/libavutil/version.h
>> @@ -54,7 +54,7 @@
>>   */
>>  
>>  #define LIBAVUTIL_VERSION_MAJOR 55
>> -#define LIBAVUTIL_VERSION_MINOR 33
>> +#define LIBAVUTIL_VERSION_MINOR 34
>>  #define LIBAVUTIL_VERSION_MICRO  0
>>  
>>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
> 
> Can't comment much on the actual code here.
> 
> Isn't it a bit strange that we also have hwcontext_cuda, and they don't
> interact at all?

Someone should add mapping between them...

> Those complex command queue operations might need a lock around them to
> make them thread-safe. (I sure as hell don't want to need fragile
> external locking around this API just because I might do
> decoding/filtering/rendering on different threads to some degree.)

OpenCL is meant to be thread-safe throughout - I don't think there is anything 
more to do?  (Assuming correctness of drivers, of course.)

There may be surprises wrt ordering/blocking if you let everything operate on 
the same command queue, but that is why the user-supplied command queues exist.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to