On 20/02/17 06:52, wm4 wrote:
> On Sun, 19 Feb 2017 18:46:42 +0000
> Mark Thompson <[email protected]> wrote:
> 
>> Uses the cl_intel_va_api_media_sharing extension, which supports only
>> NV12 surfaces and only mapping from QSV to OpenCL.
> 
> No P010?

Nope, NV12 only: 
<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_va_api_media_sharing.txt>.

(Has anyone else ever wanted to do it?  Beignet didn't immediately work, I had 
to send them 
<https://cgit.freedesktop.org/beignet/commit/?id=3c31216a4ef148ba2e6048e30c62a7a7209407cb>.)

>> ---
>>  configure                    |   5 +
>>  libavutil/hwcontext_opencl.c | 246 
>> +++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 251 insertions(+)
>>
>> diff --git a/configure b/configure
>> index a6933288b..10f2fdaf7 100755
>> --- a/configure
>> +++ b/configure
>> @@ -1706,6 +1706,7 @@ HAVE_LIST="
>>      $TYPES_LIST
>>      dos_paths
>>      dxva2_lib
>> +    intel_opencl_qsv
>>      intel_opencl_vaapi
>>      libc_msvcrt
>>      MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS
>> @@ -4826,6 +4827,10 @@ enabled vaapi &&
>>  if enabled opencl && enabled vaapi ; then
>>      check_type "CL/cl_intel.h" "clCreateImageFromFdINTEL_fn" &&
>>          enable intel_opencl_vaapi
>> +    if enabled libmfx ; then
>> +        check_type "CL/cl.h CL/va_ext.h" 
>> "clCreateFromVA_APIMediaSurfaceINTEL_fn" &&
>> +            enable intel_opencl_qsv
>> +    fi
>>  fi
>>  
>>  enabled vdpau &&
>> diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
>> index 7d14052b8..f380b35af 100644
>> --- a/libavutil/hwcontext_opencl.c
>> +++ b/libavutil/hwcontext_opencl.c
>> @@ -37,6 +37,13 @@
>>  #include "hwcontext_vaapi.h"
>>  #endif
>>  
>> +#if HAVE_INTEL_OPENCL_QSV
>> +#include <mfx/mfxstructures.h>
>> +#include <va/va.h>
>> +#include <CL/va_ext.h>
>> +#include "hwcontext_vaapi.h"
>> +#endif
>> +
>>  // The maximum number of planes in an image.  This must be structly
>>  // less than AV_NUM_DATA_POINTERS because we place the whole-frame
>>  // reference in a buffer entry after the final plane.  For now, four
>> @@ -57,6 +64,16 @@ typedef struct OpenCLDeviceContext {
>>      clCreateImageFromFdINTEL_fn  clCreateImageFromFdINTEL;
>>      clCreateBufferFromFdINTEL_fn clCreateBufferFromFdINTEL;
>>  #endif
>> +
>> +#if HAVE_INTEL_OPENCL_QSV
>> +    int qsv_enabled;
>> +    clCreateFromVA_APIMediaSurfaceINTEL_fn
>> +        clCreateFromVA_APIMediaSurfaceINTEL;
>> +    clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn
>> +        clEnqueueAcquireVA_APIMediaSurfacesINTEL;
>> +    clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn
>> +        clEnqueueReleaseVA_APIMediaSurfacesINTEL;
>> +#endif
>>  } OpenCLDeviceContext;
>>  
>>  static void opencl_error_callback(const char *errinfo,
>> @@ -325,6 +342,71 @@ static int opencl_device_init(AVHWDeviceContext *hwdev)
>>      }
>>  #endif
>>  
>> +#if HAVE_INTEL_OPENCL_QSV
>> +    {
>> +        size_t props_size;
>> +        cl_context_properties *props = NULL;
>> +        VADisplay va_display;
>> +        int i;
>> +
>> +        cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES,
>> +                               0, NULL, &props_size);
>> +        if (cle != CL_SUCCESS) {
>> +            av_log(hwdev, AV_LOG_ERROR, "Failed to get context "
>> +                   "properties: %d.\n", cle);
>> +            goto no_qsv;
>> +        }
>> +
>> +        props = av_malloc(props_size);
>> +        if (!props)
>> +            return AVERROR(ENOMEM);
>> +
>> +        cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES,
>> +                               props_size, props, NULL);
>> +        if (cle != CL_SUCCESS) {
>> +            av_log(hwdev, AV_LOG_ERROR, "Failed to get context "
>> +                   "properties: %d.\n", cle);
>> +            goto no_qsv;
>> +        }
>> +
>> +        va_display = NULL;
>> +        for (i = 0; i < (props_size / sizeof(*props) - 1); i++) {
>> +            if (props[i] == CL_CONTEXT_VA_API_DISPLAY_INTEL) {
>> +                va_display = (VADisplay)(intptr_t)props[i+1];
>> +                break;
>> +            }
>> +        }
>> +        if (!va_display) {
>> +            av_log(hwdev, AV_LOG_ERROR, "Media sharing must be "
>> +                   "enabled on context creation to use QSV to "
>> +                   "OpenCL mapping.\n");
>> +            goto no_qsv;
>> +        }
>> +        if (!vaDisplayIsValid(va_display)) {
>> +            av_log(hwdev, AV_LOG_ERROR, "A valid VADisplay is "
>> +                   "required on context creation to use QSV to "
>> +                   "OpenCL mapping.\n");
>> +            goto no_qsv;
>> +        }
> 
> (Why can the display be invalid?)

If the user made the context with CL_CONTEXT_VA_API_DISPLAY_INTEL to 
clCreateContext() but didn't get the argument right.  Really just a debugging 
aid for user initialisation.

>> +
>> +        CL_FUNC(clCreateFromVA_APIMediaSurfaceINTEL,
>> +                "Intel QSV to OpenCL mapping", no_qsv);
>> +        CL_FUNC(clEnqueueAcquireVA_APIMediaSurfacesINTEL,
>> +                "Intel QSV in OpenCL acquire", no_qsv);
>> +        CL_FUNC(clEnqueueReleaseVA_APIMediaSurfacesINTEL,
>> +                "Intel QSV in OpenCL release", no_qsv);
>> +
>> +        ctx->qsv_enabled = 1;
>> +        if (0) {
>> +        no_qsv:
>> +            av_log(hwdev, AV_LOG_ERROR, "QSV to OpenCL mapping "
>> +                   "not usable.\n");
>> +            ctx->qsv_enabled = 0;
>> +        }
>> +        av_free(props);
>> +    }
>> +#endif
>> +
>>  #undef CL_FUNC
>>  
>>      return 0;
>> @@ -360,6 +442,30 @@ static int opencl_device_derive(AVHWDeviceContext *ctx,
>>              err = opencl_device_init(ctx);
>>          break;
>>  #endif
>> +#if HAVE_INTEL_OPENCL_QSV
>> +        // The generic code automatically attempts to derive from all
>> +        // ancestors of the given device, so we can ignore QSV devices
>> +        // here and just consider the inner device it was derived from.
>> +    case AV_HWDEVICE_TYPE_VAAPI:
>> +        {
>> +            AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
>> +            cl_context_properties props[5] = {
>> +                CL_CONTEXT_VA_API_DISPLAY_INTEL,
>> +                (intptr_t)src_hwctx->display,
>> +                CL_CONTEXT_INTEROP_USER_SYNC,
>> +                CL_FALSE,
>> +                0,
>> +            };
>> +            err = av_dict_set(&opts, "device_extensions",
>> +                              "cl_intel_va_api_media_sharing", 0);
>> +            if (err >= 0)
>> +                err = opencl_device_create_internal(ctx, NULL, opts,
>> +                                                    0, props);
>> +            if (err >= 0)
>> +                err = opencl_device_init(ctx);
>> +        }
>> +        break;
>> +#endif
>>      default:
>>          err = AVERROR(ENOSYS);
>>          break;
>> @@ -1362,6 +1468,141 @@ fail:
>>  
>>  #endif
>>  
>> +#if HAVE_INTEL_OPENCL_QSV
>> +
>> +typedef struct QSVtoOpenCLMapping {
>> +    int nb_planes;
>> +    cl_mem plane[MAX_PLANES];
>> +} QSVtoOpenCLMapping;
>> +
>> +static void opencl_unmap_from_qsv(AVHWFramesContext *dst_fc,
>> +                                  HWMapDescriptor *hwmap)
>> +{
>> +    QSVtoOpenCLMapping *mapping = hwmap->priv;
>> +    OpenCLDeviceContext    *ctx = dst_fc->device_ctx->internal->priv;
>> +    cl_event event;
>> +    cl_int cle;
>> +    int i;
>> +
>> +    av_log(dst_fc, AV_LOG_DEBUG, "Unmap QSV/VAAPI surface from OpenCL.\n");
>> +
>> +    cle = ctx->clEnqueueReleaseVA_APIMediaSurfacesINTEL(
>> +        ctx->command_queue, mapping->nb_planes, mapping->plane,
>> +        0, NULL, &event);
>> +    if (cle != CL_SUCCESS) {
>> +        av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface "
>> +               "handle: %d.\n", cle);
>> +    }
>> +
>> +    opencl_wait_events(dst_fc, &event, 1);
>> +
>> +    for (i = 0; i < mapping->nb_planes; i++) {
>> +        cle = clReleaseMemObject(mapping->plane[i]);
>> +        if (cle != CL_SUCCESS) {
>> +            av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL "
>> +                   "image of plane %d of QSV/VAAPI surface.\n", cle);
>> +        }
>> +    }
>> +
>> +    av_free(mapping);
>> +}
>> +
>> +static int opencl_map_from_qsv(AVHWFramesContext *dst_fc, AVFrame *dst,
>> +                               const AVFrame *src, int flags)
>> +{
>> +    AVHWFramesContext      *src_fc =
>> +        (AVHWFramesContext*)src->hw_frames_ctx->data;
>> +    AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx;
>> +    OpenCLDeviceContext       *ctx = dst_fc->device_ctx->internal->priv;
>> +    QSVtoOpenCLMapping    *mapping = NULL;
>> +    VASurfaceID va_surface;
>> +    cl_mem_flags cl_flags;
>> +    cl_event event;
>> +    cl_int cle;
>> +    int err, i;
>> +
>> +    if (src->format == AV_PIX_FMT_QSV) {
>> +        mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3];
>> +        va_surface = *(VASurfaceID*)mfx_surface->Data.MemId;
> 
> Again the somewhat duplicated and "unportable" MemId access.

This is in a VAAPI-only function, in spite of the name.

I think I need to rename some of this stuff to be clearer.  This is really 
Intel proprietary VAAPI only (and usable as QSV-wrapping-VAAPI), and DXVA2 will 
need separate functions.

So maybe:

HAVE_INTEL_OPENCL_VAAPI -> HAVE_INTEL_BEIGNET_OPENCL_VAAPI
HAVE_INTEL_OPENCL_QSV   -> HAVE_INTEL_MEDIA_OPENCL_VAAPI

then also HAVE_OPENCL_DXVA2 (which is a standard extension, so not INTEL).

>> +    } else if (src->format == AV_PIX_FMT_VAAPI) {
>> +        va_surface = (VASurfaceID)(uintptr_t)src->data[3];
>> +    } else {
>> +        return AVERROR(ENOSYS);
>> +    }
>> +
>> +    if (flags & (AV_HWFRAME_MAP_READ | AV_HWFRAME_MAP_WRITE))
>> +        cl_flags = CL_MEM_READ_WRITE;
>> +    else if (flags & AV_HWFRAME_MAP_READ)
>> +        cl_flags = CL_MEM_READ_ONLY;
>> +    else if (flags & AV_HWFRAME_MAP_WRITE)
>> +        cl_flags = CL_MEM_WRITE_ONLY;
>> +    else
>> +        return AVERROR(EINVAL);
> 
> We don't have this flag-mapping somewhere else already?

The similar use in opencl_map_frame() is different, because it allows more flag 
combinations.

>> +
>> +    av_log(src_fc, AV_LOG_DEBUG, "Map QSV/VAAPI surface %#x to "
>> +           "OpenCL.\n", va_surface);
>> +
>> +    mapping = av_mallocz(sizeof(*mapping));
>> +    if (!mapping) {
>> +        err = AVERROR(ENOMEM);
>> +        goto fail;
>> +    }
>> +
>> +    // The cl_intel_va_api_media_sharing extension only supports NV12
>> +    // surfaces, so for now there are always exactly two planes.
>> +    mapping->nb_planes = 2;
>> +
>> +    for (i = 0; i < mapping->nb_planes; i++) {
>> +        mapping->plane[i] = ctx->clCreateFromVA_APIMediaSurfaceINTEL(
>> +            dst_dev->context, cl_flags, &va_surface, i, &cle);
>> +        if (!mapping->plane[i]) {
>> +            av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL "
>> +                   "image from plane %d of QSV/VAAPI surface "
>> +                   "%#x: %d.\n", i, va_surface, cle);
>> +            err = AVERROR(EIO);
>> +            goto fail;
>> +        }
>> +
>> +        dst->data[i]     = (uint8_t*)mapping->plane[i];
>> +        dst->linesize[i] = src_fc->width;
>> +    }
>> +
>> +    cle = ctx->clEnqueueAcquireVA_APIMediaSurfacesINTEL(
>> +        ctx->command_queue, mapping->nb_planes, mapping->plane,
>> +        0, NULL, &event);
>> +    if (cle != CL_SUCCESS) {
>> +        av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface "
>> +               "handle: %d.\n", cle);
>> +        err = AVERROR(EIO);
>> +        goto fail;
>> +    }
>> +
>> +    err = opencl_wait_events(dst_fc, &event, 1);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
>> +                                &opencl_unmap_from_qsv, mapping);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    dst->width  = src->width;
>> +    dst->height = src->height;
>> +
>> +    return 0;
>> +
>> +fail:
>> +    if (mapping) {
>> +        for (i = 0; i < mapping->nb_planes; i++)
>> +            if (mapping->plane[i])
>> +                clReleaseMemObject(mapping->plane[i]);
>> +    }
>> +    av_freep(&mapping);
>> +    return err;
>> +}
>> +
>> +#endif
>> +
>>  
>>  static int opencl_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
>>                           const AVFrame *src, int flags)
>> @@ -1372,6 +1613,11 @@ static int opencl_map_to(AVHWFramesContext *hwfc, 
>> AVFrame *dst,
>>      case AV_PIX_FMT_VAAPI:
>>          return opencl_map_from_vaapi(hwfc, dst, src, flags);
>>  #endif
>> +#if HAVE_INTEL_OPENCL_QSV
>> +    case AV_PIX_FMT_QSV:
>> +    case AV_PIX_FMT_VAAPI:
>> +        return opencl_map_from_qsv(hwfc, dst, src, flags);
>> +#endif
>>      default:
>>          return AVERROR(ENOSYS);
>>      }
> 
> Can't comment much. The ifdeffery and complexity of hwcontext_opencl
> seems a bit worrying, maybe calling for a more structured approach.

It could be split across multple files, but there is a lot of shared code so 
quite a bit of stuff would have to end up in private headers, which didn't seem 
very nice.  Or did you have something else in mind?
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to