On 17/02/16 21:14, Anton Khirnov wrote:
> Quoting Mark Thompson (2016-02-16 00:24:31)
>> diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
>> ...
>> +static AVBufferRef *vaapi_pool_alloc(void *opaque, int size)
>> +{
>> + AVHWFramesContext *hwfc = opaque;
>> + VAAPIFramesContext *ctx = hwfc->internal->priv;
>> + AVVAAPIDeviceContext *hwctx = hwfc->device_ctx->hwctx;
>> + AVVAAPIFramesContext *avfc = hwfc->hwctx;
>> + VASurfaceID surface_id;
>> + VAStatus vas;
>> + AVBufferRef *ref;
>> +
>> + vas = vaCreateSurfaces(hwctx->display, ctx->rt_format,
>> + hwfc->width, hwfc->height,
>> + &surface_id, 1,
>> + ctx->attributes, ctx->attribute_count);
>> + if (vas != VA_STATUS_SUCCESS) {
>> + av_log(hwfc, AV_LOG_ERROR, "Failed to create surface: "
>> + "%d (%s).\n", vas, vaErrorStr(vas));
>> + return NULL;
>> + }
>> + av_log(hwfc, AV_LOG_DEBUG, "Created surface %#x.\n", surface_id);
>> +
>> + ref = av_buffer_create((uint8_t*)(uintptr_t)surface_id,
>> + sizeof(surface_id), &vaapi_buffer_free,
>> + hwfc, AV_BUFFER_FLAG_READONLY);
>> + if (!ref) {
>> + vaDestroySurfaces(hwctx->display, &surface_id, 1);
>> + return NULL;
>> + }
>> +
>> + if (hwfc->initial_pool_size > 0) {
>> + av_assert0(avfc->surface_count < hwfc->initial_pool_size);
>> + avfc->surface_ids[avfc->surface_count] = surface_id;
>> + }
>> + ++avfc->surface_count;
>> + if (avfc->surface_count == 1) {
>> + // Test whether vaDeriveImage() works for this type of surface.
>> + VAImageFormat *expected_format;
>> + VAImage test_image;
>> + int err;
>> +
>> + ctx->derive_works = 0;
>
> I see potential troubles with modifying the context here -- both the
> private one and (especially) the public one.
> This API should be thread-safe, since there are many framesctx refs
> lying around and you don't know when they will be used. If necessary, we
> can add locking to it, but it's better to avoid that if possible. And i
> think it is possible here -- testing whether derive works can be done at
> frames ctx (or even device ctx) init, right?
Yes. (Given the format, so frames ctx init.)
> Modifying the surface count is even more tricky, since the field is
> public and can be accessed concurrently by the user. We can add some
> kind of a locking API, but I still feel that it will be much simpler
> to just preallocate the entire array in framesctx init and never touch
> it after that.
> And if the caller really wants a dynamic pool, that is still achievable
> if he supplies his own custom AVBufferPool.
Ok, sure. I'm have no use for dynamic pools given the silly render target
constraint, so removing them is not a problem.
Was there an answer to the DXVAsomething surface allocation question, which
might influence ordering in this area?
>> +
>> + err = vaapi_get_image_format(hwfc->device_ctx,
>> + hwfc->sw_format, &expected_format);
>> + if (err == 0) {
>> + vas = vaDeriveImage(hwctx->display, surface_id, &test_image);
>> + if (vas == VA_STATUS_SUCCESS) {
>> + if (expected_format->fourcc == test_image.format.fourcc) {
>> + av_log(hwfc, AV_LOG_DEBUG, "Direct mapping
>> possible.\n");
>> + ctx->derive_works = 1;
>> + } else {
>> + av_log(hwfc, AV_LOG_DEBUG, "Direct mapping disabled: "
>> + "derived image format %08x does not match "
>> + "expected format %08x.\n",
>> + expected_format->fourcc,
>> test_image.format.fourcc);
>> + }
>> + vaDestroyImage(hwctx->display, test_image.image_id);
>> + } else {
>> + av_log(hwfc, AV_LOG_DEBUG, "Direct mapping disabled: "
>> + "deriving image does not work: "
>> + "%d (%s).\n", vas, vaErrorStr(vas));
>> + }
>> + } else {
>> + av_log(hwfc, AV_LOG_DEBUG, "Direct mapping disabled: "
>> + "image format is not supported.\n");
>> + }
>> + }
>> +
>> + return ref;
>> +}
>> ...
>> +static int vaapi_map_frame(AVHWFramesContext *hwfc,
>> + AVFrame *dst, const AVFrame *src, int flags)
>> +{
>> + AVVAAPIDeviceContext *hwctx = hwfc->device_ctx->hwctx;
>> + VAAPIFramesContext *ctx = hwfc->internal->priv;
>> + VASurfaceID surface_id;
>> + VAImageFormat *image_format;
>> + VAAPISurfaceMap *map;
>> + VAStatus vas;
>> + void *address = 0;
>> + int err, i;
>> +
>> + surface_id = (VASurfaceID)(uintptr_t)src->data[3];
>> + av_log(hwfc, AV_LOG_DEBUG, "Map surface %#x.\n", surface_id);
>> +
>> + if (!ctx->derive_works && (flags & VAAPI_MAP_DIRECT)) {
>> + // Requested direct mapping but it is not possible.
>> + return AVERROR(EINVAL);
>> + }
>> + if (dst->format == AV_PIX_FMT_NONE)
>> + dst->format = hwfc->sw_format;
>> + if (dst->format != hwfc->sw_format && (flags & VAAPI_MAP_DIRECT)) {
>> + // Requested direct mapping but the formats do not match.
>> + return AVERROR(EINVAL);
>> + }
>> +
>> + err = vaapi_get_image_format(hwfc->device_ctx, dst->format,
>> &image_format);
>> + if (err < 0) {
>> + // Requested format is not a valid output format.
>> + return AVERROR(EINVAL);
>> + }
>> +
>> + map = av_malloc(sizeof(VAAPISurfaceMap));
>> + if (!map)
>> + return AVERROR(ENOMEM);
>> +
>> + map->source = src;
>> + map->flags = flags;
>> + map->image.image_id = VA_INVALID_ID;
>> +
>> + vas = vaSyncSurface(hwctx->display, surface_id);
>> + if (vas != VA_STATUS_SUCCESS) {
>> + av_log(hwfc, AV_LOG_ERROR, "Failed to sync surface "
>> + "%#x: %d (%s).\n", surface_id, vas, vaErrorStr(vas));
>> + err = AVERROR(EIO);
>> + goto fail;
>> + }
>> +
>> + // On current Intel drivers, derive gives you memory which is very slow
>> + // to read normally. Assume for now that a user who asks for read
>> access
>> + // but doesn't explicitly request direct mapping is not going to be
>> + // optimised for such, so don't use derive in that case.
>
> Adding a copy implementation that uses non-temporal loads should fix
> that, right? (not implying you have to implement it right now)
Yes.
> Other than that, this looks very nice. A bunch of other cosmetic notes
> (which you can ignore):
> - there's av_malloc_array() for allocating arrays which checks that the
> multiply doesn't overflow
Sure.
> - there's a bunch of unnecessary NULL checks before free, we ususally
> don't do that since av_free(NULL) is valid
Already removed in reponse to review from Diego.
> - a variable storing number of foos is usually named 'nb_foo',
> especially in the public API it's better to have that consistent
Ok, I'll change this to be consistent.
> - it's usually better to use sizeof(var) instead of sizeof(type)
Also already done.
How opposed are you to a bit of VAAPI-specific API here? Having got a bit
further, the one specific which would be useful to avoid annoying duplication
is the enumeration of formats available in a given configuration (which can
then be passed as sw_format when making a hw_frames_ctx). It's currently done
in the device setup to get transfer formats, then identically in the
vaapi_scale filter, then again in encoder setup.
In any case, I will probably post a new version with at least the filter code
added tomorrow.
Thanks,
- Mark
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel