On 20/02/17 06:52, wm4 wrote: > On Sun, 19 Feb 2017 18:46:42 +0000 > Mark Thompson <[email protected]> wrote: > >> Uses the cl_intel_va_api_media_sharing extension, which supports only >> NV12 surfaces and only mapping from QSV to OpenCL. > > No P010?
Nope, NV12 only: <https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_va_api_media_sharing.txt>. (Has anyone else ever wanted to do it? Beignet didn't immediately work, I had to send them <https://cgit.freedesktop.org/beignet/commit/?id=3c31216a4ef148ba2e6048e30c62a7a7209407cb>.) >> --- >> configure | 5 + >> libavutil/hwcontext_opencl.c | 246 >> +++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 251 insertions(+) >> >> diff --git a/configure b/configure >> index a6933288b..10f2fdaf7 100755 >> --- a/configure >> +++ b/configure >> @@ -1706,6 +1706,7 @@ HAVE_LIST=" >> $TYPES_LIST >> dos_paths >> dxva2_lib >> + intel_opencl_qsv >> intel_opencl_vaapi >> libc_msvcrt >> MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS >> @@ -4826,6 +4827,10 @@ enabled vaapi && >> if enabled opencl && enabled vaapi ; then >> check_type "CL/cl_intel.h" "clCreateImageFromFdINTEL_fn" && >> enable intel_opencl_vaapi >> + if enabled libmfx ; then >> + check_type "CL/cl.h CL/va_ext.h" >> "clCreateFromVA_APIMediaSurfaceINTEL_fn" && >> + enable intel_opencl_qsv >> + fi >> fi >> >> enabled vdpau && >> diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c >> index 7d14052b8..f380b35af 100644 >> --- a/libavutil/hwcontext_opencl.c >> +++ b/libavutil/hwcontext_opencl.c >> @@ -37,6 +37,13 @@ >> #include "hwcontext_vaapi.h" >> #endif >> >> +#if HAVE_INTEL_OPENCL_QSV >> +#include <mfx/mfxstructures.h> >> +#include <va/va.h> >> +#include <CL/va_ext.h> >> +#include "hwcontext_vaapi.h" >> +#endif >> + >> // The maximum number of planes in an image. This must be structly >> // less than AV_NUM_DATA_POINTERS because we place the whole-frame >> // reference in a buffer entry after the final plane. For now, four >> @@ -57,6 +64,16 @@ typedef struct OpenCLDeviceContext { >> clCreateImageFromFdINTEL_fn clCreateImageFromFdINTEL; >> clCreateBufferFromFdINTEL_fn clCreateBufferFromFdINTEL; >> #endif >> + >> +#if HAVE_INTEL_OPENCL_QSV >> + int qsv_enabled; >> + clCreateFromVA_APIMediaSurfaceINTEL_fn >> + clCreateFromVA_APIMediaSurfaceINTEL; >> + clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn >> + clEnqueueAcquireVA_APIMediaSurfacesINTEL; >> + clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn >> + clEnqueueReleaseVA_APIMediaSurfacesINTEL; >> +#endif >> } OpenCLDeviceContext; >> >> static void opencl_error_callback(const char *errinfo, >> @@ -325,6 +342,71 @@ static int opencl_device_init(AVHWDeviceContext *hwdev) >> } >> #endif >> >> +#if HAVE_INTEL_OPENCL_QSV >> + { >> + size_t props_size; >> + cl_context_properties *props = NULL; >> + VADisplay va_display; >> + int i; >> + >> + cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES, >> + 0, NULL, &props_size); >> + if (cle != CL_SUCCESS) { >> + av_log(hwdev, AV_LOG_ERROR, "Failed to get context " >> + "properties: %d.\n", cle); >> + goto no_qsv; >> + } >> + >> + props = av_malloc(props_size); >> + if (!props) >> + return AVERROR(ENOMEM); >> + >> + cle = clGetContextInfo(hwctx->context, CL_CONTEXT_PROPERTIES, >> + props_size, props, NULL); >> + if (cle != CL_SUCCESS) { >> + av_log(hwdev, AV_LOG_ERROR, "Failed to get context " >> + "properties: %d.\n", cle); >> + goto no_qsv; >> + } >> + >> + va_display = NULL; >> + for (i = 0; i < (props_size / sizeof(*props) - 1); i++) { >> + if (props[i] == CL_CONTEXT_VA_API_DISPLAY_INTEL) { >> + va_display = (VADisplay)(intptr_t)props[i+1]; >> + break; >> + } >> + } >> + if (!va_display) { >> + av_log(hwdev, AV_LOG_ERROR, "Media sharing must be " >> + "enabled on context creation to use QSV to " >> + "OpenCL mapping.\n"); >> + goto no_qsv; >> + } >> + if (!vaDisplayIsValid(va_display)) { >> + av_log(hwdev, AV_LOG_ERROR, "A valid VADisplay is " >> + "required on context creation to use QSV to " >> + "OpenCL mapping.\n"); >> + goto no_qsv; >> + } > > (Why can the display be invalid?) If the user made the context with CL_CONTEXT_VA_API_DISPLAY_INTEL to clCreateContext() but didn't get the argument right. Really just a debugging aid for user initialisation. >> + >> + CL_FUNC(clCreateFromVA_APIMediaSurfaceINTEL, >> + "Intel QSV to OpenCL mapping", no_qsv); >> + CL_FUNC(clEnqueueAcquireVA_APIMediaSurfacesINTEL, >> + "Intel QSV in OpenCL acquire", no_qsv); >> + CL_FUNC(clEnqueueReleaseVA_APIMediaSurfacesINTEL, >> + "Intel QSV in OpenCL release", no_qsv); >> + >> + ctx->qsv_enabled = 1; >> + if (0) { >> + no_qsv: >> + av_log(hwdev, AV_LOG_ERROR, "QSV to OpenCL mapping " >> + "not usable.\n"); >> + ctx->qsv_enabled = 0; >> + } >> + av_free(props); >> + } >> +#endif >> + >> #undef CL_FUNC >> >> return 0; >> @@ -360,6 +442,30 @@ static int opencl_device_derive(AVHWDeviceContext *ctx, >> err = opencl_device_init(ctx); >> break; >> #endif >> +#if HAVE_INTEL_OPENCL_QSV >> + // The generic code automatically attempts to derive from all >> + // ancestors of the given device, so we can ignore QSV devices >> + // here and just consider the inner device it was derived from. >> + case AV_HWDEVICE_TYPE_VAAPI: >> + { >> + AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; >> + cl_context_properties props[5] = { >> + CL_CONTEXT_VA_API_DISPLAY_INTEL, >> + (intptr_t)src_hwctx->display, >> + CL_CONTEXT_INTEROP_USER_SYNC, >> + CL_FALSE, >> + 0, >> + }; >> + err = av_dict_set(&opts, "device_extensions", >> + "cl_intel_va_api_media_sharing", 0); >> + if (err >= 0) >> + err = opencl_device_create_internal(ctx, NULL, opts, >> + 0, props); >> + if (err >= 0) >> + err = opencl_device_init(ctx); >> + } >> + break; >> +#endif >> default: >> err = AVERROR(ENOSYS); >> break; >> @@ -1362,6 +1468,141 @@ fail: >> >> #endif >> >> +#if HAVE_INTEL_OPENCL_QSV >> + >> +typedef struct QSVtoOpenCLMapping { >> + int nb_planes; >> + cl_mem plane[MAX_PLANES]; >> +} QSVtoOpenCLMapping; >> + >> +static void opencl_unmap_from_qsv(AVHWFramesContext *dst_fc, >> + HWMapDescriptor *hwmap) >> +{ >> + QSVtoOpenCLMapping *mapping = hwmap->priv; >> + OpenCLDeviceContext *ctx = dst_fc->device_ctx->internal->priv; >> + cl_event event; >> + cl_int cle; >> + int i; >> + >> + av_log(dst_fc, AV_LOG_DEBUG, "Unmap QSV/VAAPI surface from OpenCL.\n"); >> + >> + cle = ctx->clEnqueueReleaseVA_APIMediaSurfacesINTEL( >> + ctx->command_queue, mapping->nb_planes, mapping->plane, >> + 0, NULL, &event); >> + if (cle != CL_SUCCESS) { >> + av_log(dst_fc, AV_LOG_ERROR, "Failed to release surface " >> + "handle: %d.\n", cle); >> + } >> + >> + opencl_wait_events(dst_fc, &event, 1); >> + >> + for (i = 0; i < mapping->nb_planes; i++) { >> + cle = clReleaseMemObject(mapping->plane[i]); >> + if (cle != CL_SUCCESS) { >> + av_log(dst_fc, AV_LOG_ERROR, "Failed to release CL " >> + "image of plane %d of QSV/VAAPI surface.\n", cle); >> + } >> + } >> + >> + av_free(mapping); >> +} >> + >> +static int opencl_map_from_qsv(AVHWFramesContext *dst_fc, AVFrame *dst, >> + const AVFrame *src, int flags) >> +{ >> + AVHWFramesContext *src_fc = >> + (AVHWFramesContext*)src->hw_frames_ctx->data; >> + AVOpenCLDeviceContext *dst_dev = dst_fc->device_ctx->hwctx; >> + OpenCLDeviceContext *ctx = dst_fc->device_ctx->internal->priv; >> + QSVtoOpenCLMapping *mapping = NULL; >> + VASurfaceID va_surface; >> + cl_mem_flags cl_flags; >> + cl_event event; >> + cl_int cle; >> + int err, i; >> + >> + if (src->format == AV_PIX_FMT_QSV) { >> + mfxFrameSurface1 *mfx_surface = (mfxFrameSurface1*)src->data[3]; >> + va_surface = *(VASurfaceID*)mfx_surface->Data.MemId; > > Again the somewhat duplicated and "unportable" MemId access. This is in a VAAPI-only function, in spite of the name. I think I need to rename some of this stuff to be clearer. This is really Intel proprietary VAAPI only (and usable as QSV-wrapping-VAAPI), and DXVA2 will need separate functions. So maybe: HAVE_INTEL_OPENCL_VAAPI -> HAVE_INTEL_BEIGNET_OPENCL_VAAPI HAVE_INTEL_OPENCL_QSV -> HAVE_INTEL_MEDIA_OPENCL_VAAPI then also HAVE_OPENCL_DXVA2 (which is a standard extension, so not INTEL). >> + } else if (src->format == AV_PIX_FMT_VAAPI) { >> + va_surface = (VASurfaceID)(uintptr_t)src->data[3]; >> + } else { >> + return AVERROR(ENOSYS); >> + } >> + >> + if (flags & (AV_HWFRAME_MAP_READ | AV_HWFRAME_MAP_WRITE)) >> + cl_flags = CL_MEM_READ_WRITE; >> + else if (flags & AV_HWFRAME_MAP_READ) >> + cl_flags = CL_MEM_READ_ONLY; >> + else if (flags & AV_HWFRAME_MAP_WRITE) >> + cl_flags = CL_MEM_WRITE_ONLY; >> + else >> + return AVERROR(EINVAL); > > We don't have this flag-mapping somewhere else already? The similar use in opencl_map_frame() is different, because it allows more flag combinations. >> + >> + av_log(src_fc, AV_LOG_DEBUG, "Map QSV/VAAPI surface %#x to " >> + "OpenCL.\n", va_surface); >> + >> + mapping = av_mallocz(sizeof(*mapping)); >> + if (!mapping) { >> + err = AVERROR(ENOMEM); >> + goto fail; >> + } >> + >> + // The cl_intel_va_api_media_sharing extension only supports NV12 >> + // surfaces, so for now there are always exactly two planes. >> + mapping->nb_planes = 2; >> + >> + for (i = 0; i < mapping->nb_planes; i++) { >> + mapping->plane[i] = ctx->clCreateFromVA_APIMediaSurfaceINTEL( >> + dst_dev->context, cl_flags, &va_surface, i, &cle); >> + if (!mapping->plane[i]) { >> + av_log(dst_fc, AV_LOG_ERROR, "Failed to create CL " >> + "image from plane %d of QSV/VAAPI surface " >> + "%#x: %d.\n", i, va_surface, cle); >> + err = AVERROR(EIO); >> + goto fail; >> + } >> + >> + dst->data[i] = (uint8_t*)mapping->plane[i]; >> + dst->linesize[i] = src_fc->width; >> + } >> + >> + cle = ctx->clEnqueueAcquireVA_APIMediaSurfacesINTEL( >> + ctx->command_queue, mapping->nb_planes, mapping->plane, >> + 0, NULL, &event); >> + if (cle != CL_SUCCESS) { >> + av_log(dst_fc, AV_LOG_ERROR, "Failed to acquire surface " >> + "handle: %d.\n", cle); >> + err = AVERROR(EIO); >> + goto fail; >> + } >> + >> + err = opencl_wait_events(dst_fc, &event, 1); >> + if (err < 0) >> + goto fail; >> + >> + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, >> + &opencl_unmap_from_qsv, mapping); >> + if (err < 0) >> + goto fail; >> + >> + dst->width = src->width; >> + dst->height = src->height; >> + >> + return 0; >> + >> +fail: >> + if (mapping) { >> + for (i = 0; i < mapping->nb_planes; i++) >> + if (mapping->plane[i]) >> + clReleaseMemObject(mapping->plane[i]); >> + } >> + av_freep(&mapping); >> + return err; >> +} >> + >> +#endif >> + >> >> static int opencl_map_to(AVHWFramesContext *hwfc, AVFrame *dst, >> const AVFrame *src, int flags) >> @@ -1372,6 +1613,11 @@ static int opencl_map_to(AVHWFramesContext *hwfc, >> AVFrame *dst, >> case AV_PIX_FMT_VAAPI: >> return opencl_map_from_vaapi(hwfc, dst, src, flags); >> #endif >> +#if HAVE_INTEL_OPENCL_QSV >> + case AV_PIX_FMT_QSV: >> + case AV_PIX_FMT_VAAPI: >> + return opencl_map_from_qsv(hwfc, dst, src, flags); >> +#endif >> default: >> return AVERROR(ENOSYS); >> } > > Can't comment much. The ifdeffery and complexity of hwcontext_opencl > seems a bit worrying, maybe calling for a more structured approach. It could be split across multple files, but there is a lot of shared code so quite a bit of stuff would have to end up in private headers, which didn't seem very nice. Or did you have something else in mind? _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
