On 04/05/17 07:44, wm4 wrote:
> To be used with the new d3d11 hwaccel decode API.
>
> With the new hwaccel API, we don't want surfaces to depend on the
> decoder (other than the required dimension and format). The old D3D11VA
> pixfmt uses ID3D11VideoDecoderOutputView pointers, which include the
> decoder configuration, and thus is incompatible with the new hwaccel
> API. This patch introduces AV_PIX_FMT_D3D11, which uses ID3D11Texture2D
> and an index. It's simpler and compatible with the new hwaccel API.
>
> The introduced hwcontext supports only the new pixfmt.
>
> Significantly based on work by Steve Lhomme <[email protected]>, but with
> heavy changes/rewrites.
> ---
> Somewhat sketchy: if initial_pool_size is set, the pool is assumed to
> be static.
> ---
> configure | 6 +
> doc/APIchanges | 3 +
> libavutil/Makefile | 3 +
> libavutil/hwcontext.c | 4 +
> libavutil/hwcontext.h | 1 +
> libavutil/hwcontext_d3d11va.c | 488
> +++++++++++++++++++++++++++++++++++++++++
> libavutil/hwcontext_d3d11va.h | 158 +++++++++++++
> libavutil/hwcontext_internal.h | 1 +
> libavutil/pixdesc.c | 4 +
> libavutil/pixfmt.h | 4 +-
> libavutil/version.h | 4 +-
> 11 files changed, 673 insertions(+), 3 deletions(-)
> create mode 100644 libavutil/hwcontext_d3d11va.c
> create mode 100644 libavutil/hwcontext_d3d11va.h
>
> ...
> +
> +static AVBufferRef *d3d11va_pool_alloc(void *opaque, int size)
> +{
> + AVHWFramesContext *ctx = (AVHWFramesContext*)opaque;
> + D3D11VAFramesContext *s = ctx->internal->priv;
> + AVD3D11VAFramesContext *hwctx = ctx->hwctx;
> + AVD3D11FrameDescriptor *desc;
> + D3D11_TEXTURE2D_DESC texDesc;
> + AVBufferRef *buf;
desc and buf are unused.
> +
> + if (!hwctx->texture)
> + return d3d11va_alloc_single(ctx);
> +
> + ID3D11Texture2D_GetDesc(hwctx->texture, &texDesc);
> +
> + if (s->nb_surfaces_used >= texDesc.ArraySize) {
> + av_log(ctx, AV_LOG_ERROR, "Static surface pool size exceeded.\n");
> + return NULL;
> + }
> +
> + ID3D11Texture2D_AddRef(hwctx->texture);
> + return wrap_texture_buf(hwctx->texture, s->nb_surfaces_used++);
> +}
> +
> ...
> +
> +static int d3d11va_transfer_data(AVHWFramesContext *ctx, AVFrame *dst,
> + const AVFrame *src)
> +{
> + AVD3D11VADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
> + D3D11VAFramesContext *s = ctx->internal->priv;
> + int download = !!src->hw_frames_ctx;
This isn't correct - src could have hw_frames_ctx set as a mapped frame while
we are really uploading to dst. I don't think you can write this with only one
function.
(Not sure there is any case where you would ever want to do that, though.)
> + const AVFrame *frame = download ? src : dst;
> + // (The interface types are compatible.)
> + ID3D11Resource *texture = (ID3D11Resource *)(ID3D11Texture2D
> *)frame->data[0];
> + int index = (intptr_t)frame->data[1];
> + ID3D11Resource *staging = (ID3D11Resource *)s->staging_texture;
> + int w = FFMIN(dst->width, src->width);
> + int h = FFMIN(dst->height, src->height);
> + uint8_t *map_data[4];
> + int map_linesize[4];
> + D3D11_TEXTURE2D_DESC desc;
> + D3D11_MAPPED_SUBRESOURCE map;
> + HRESULT hr;
> +
> + device_hwctx->lock(device_hwctx->lock_ctx);
> +
> + ID3D11Texture2D_GetDesc(s->staging_texture, &desc);
> +
> + if (download) {
> +
> ID3D11DeviceContext_CopySubresourceRegion(device_hwctx->device_context,
> + staging, 0, 0, 0, 0,
> + texture, index, NULL);
> +
> + hr = ID3D11DeviceContext_Map(device_hwctx->device_context,
> + staging, 0, D3D11_MAP_READ, 0, &map);
> + if (FAILED(hr))
> + goto map_failed;
> +
> + fill_texture_ptrs(map_data, map_linesize, ctx, &desc, &map);
> +
> + av_image_copy(dst->data, dst->linesize, map_data, map_linesize,
> + ctx->sw_format, w, h);
Is the staging texture somehow magic such that it isn't going to be uncached
memory or anything tricky like that?
> +
> + ID3D11DeviceContext_Unmap(device_hwctx->device_context, staging, 0);
> + } else {
> + hr = ID3D11DeviceContext_Map(device_hwctx->device_context,
> + staging, 0, D3D11_MAP_WRITE, 0, &map);
> + if (FAILED(hr))
> + goto map_failed;
> +
> + fill_texture_ptrs(map_data, map_linesize, ctx, &desc, &map);
> +
> + av_image_copy(map_data, map_linesize, src->data, src->linesize,
> + ctx->sw_format, w, h);
> +
> + ID3D11DeviceContext_Unmap(device_hwctx->device_context, staging, 0);
> +
> +
> ID3D11DeviceContext_CopySubresourceRegion(device_hwctx->device_context,
> + texture, index, 0, 0, 0,
> + staging, 0, NULL);
> + }
> +
> + device_hwctx->unlock(device_hwctx->lock_ctx);
> + return 0;
> +
> +map_failed:
> + av_log(ctx, AV_LOG_ERROR, "Unable to lock D3D11VA surface (%lx)\n",
> (long)hr);
> + device_hwctx->unlock(device_hwctx->lock_ctx);
> + return AVERROR_UNKNOWN;
> +}
Seems like it would be nicer to implement map and use it rather than putting it
multiple times inside this function.
> +/**
> + * @file
> + * An API-specific header for AV_HWDEVICE_TYPE_D3D11VA.
> + *
> + * The default pool implementation will be fixed-size if initial_pool_size is
> + * set (and allocate elements from an array texture). Otherwise it will
> allocate
> + * individual textures. Be aware that decoding requires an array texture.
> + */
> +
> +#include <d3d11.h>
> +
> +/**
> + * This struct is allocated as AVHWDeviceContext.hwctx
> + */
> +typedef struct AVD3D11VADeviceContext {
> + /**
> + * Device used for texture creation and access. This can also be used to
> + * set the libavcodec decoding device.
> + *
> + * Must be set by the user. This is the only mandatory field - the other
> + * device context fields are set from this and are available for
> convenience.
> + * FIXME: user has to set dev_ctx_mutex, even if it's unused
dev_ctx_mutex doesn't exist anymore.
> + *
> + * Deallocating the AVHWDeviceContext will always release this interface,
> + * and it does not matter whether it was user-allocated.
> + */
> + ID3D11Device *device;
> +
> + /**
> + * If unset, this will be set from the device field on init.
> + *
> + * Deallocating the AVHWDeviceContext will always release this interface,
> + * and it does not matter whether it was user-allocated.
> + */
> + ID3D11DeviceContext *device_context;
> +
> + /**
> + * If unset, this will be set from the device field on init.
> + *
> + * Deallocating the AVHWDeviceContext will always release this interface,
> + * and it does not matter whether it was user-allocated.
> + */
> + ID3D11VideoDevice *video_device;
> +
> + /**
> + * If unset, this will be set from the device_context field on init.
> + *
> + * Deallocating the AVHWDeviceContext will always release this interface,
> + * and it does not matter whether it was user-allocated.
> + */
> + ID3D11VideoContext *video_context;
> +
> + /**
> + * Callbacks for locking. They protect accesses to most D3D method calls.
> + * They also protect access to the internal staging texture (for
> + * av_hwframe_transfer_data() calls). They do NOT protect access to
> + * hwcontext or decoder state in general.
> + *
> + * If unset on init, the hwcontext implementation will set them to use an
> + * internal mutex.
> + *
> + * If set, the underlying lock must be recursive. lock_ctx is for free
> use
> + * by the locking implementation.
> + */
> + void (*lock)(void *lock_ctx);
> + void (*unlock)(void *lock_ctx);
> + void *lock_ctx;
> +} AVD3D11VADeviceContext;
> +
> +/**
> + * D3D11 frame descriptor for pool allocation.
> + *
> + * In user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
> + * with the data pointer pointing at an object of this type describing the
> + * planes of the frame.
> + */
> +typedef struct AVD3D11FrameDescriptor {
> + /**
> + * The texture in which the frame is located in. The reference count is
> + * managed by the AVBufferRef, and destroying the reference will release
> + * the interface.
> + *
> + * Normally stored in AVFrame.data[0].
> + */
> + ID3D11Texture2D *texture;
> + /**
> + * The index into the array texture element representing the frame, or 0
> + * if the texture is not an array texture.
> + *
> + * Normally stored in AVFrame.data[1] (casted from intptr_t).
> + */
> + intptr_t index;
> +} AVD3D11FrameDescriptor;
> +
> +/**
> + * This struct is allocated as AVHWFramesContext.hwctx
> + */
> +typedef struct AVD3D11VAFramesContext {
> + /**
> + * The canonical texture used for pool allocation. If this is set to NULL
> + * on init, the hwframes implementation will allocate and set an array
> + * texture if initial_pool_size > 0.
> + *
> + * The only situation when the API user should set this is:
> + * - the user wants to do manual pool allocation (setting
> + * AVHWFramesContext.pool), instead of letting AVHWFramesContext
> + * allocate the pool
> + * - of an array texture
> + * - and wants it to use it for decoding
> + * - this has to be done before calling av_hwframe_ctx_init()
> + *
> + * Deallocating the AVHWFramesContext will always release this interface,
> + * and it does not matter whether it was user-allocated.
> + *
> + * This is in particular used by the libavcodec D3D11VA hwaccel, which
> + * requires a single array texture. It will create
> ID3D11VideoDecoderOutputView
> + * objects for each array texture element on decoder initialization.
> + */
> + ID3D11Texture2D *texture;
> +
> + /**
> + * D3D11_TEXTURE2D_DESC.BindFlags used for texture creation. The user
> must
> + * at least set D3D11_BIND_DECODER if the frames context is to be used
> for
> + * video decoding.
> + * This field is ignored/invalid if a user-allocated texture is provided.
> + */
> + UINT BindFlags;
> +
> + /**
> + * D3D11_TEXTURE2D_DESC.MiscFlags used for texture creation.
> + * This field is ignored/invalid if a user-allocated texture is provided.
> + */
> + UINT MiscFlags;
> +} AVD3D11VAFramesContext;
:) to this definition. Much more sane than the weird decoder interdependency
of previous ideas.
> diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
> index fc1969ef12..d55f4d5cdc 100644
> --- a/libavutil/pixfmt.h
> +++ b/libavutil/pixfmt.h
> @@ -205,7 +205,7 @@ enum AVPixelFormat {
> */
> AV_PIX_FMT_MMAL,
>
> - AV_PIX_FMT_D3D11VA_VLD, ///< HW decoding through Direct3D11,
> Picture.data[3] contains a ID3D11VideoDecoderOutputView pointer
> + AV_PIX_FMT_D3D11VA_VLD, ///< HW decoding through Direct3D11 via old
> API, Picture.data[3] contains a ID3D11VideoDecoderOutputView pointer
>
> /**
> * HW acceleration through CUDA. data[i] contain CUdeviceptr pointers
> @@ -237,6 +237,8 @@ enum AVPixelFormat {
> AV_PIX_FMT_GBRAP10BE, ///< planar GBR 4:4:4:4 40bpp, big-endian
> AV_PIX_FMT_GBRAP10LE, ///< planar GBR 4:4:4:4 40bpp, little-endian
>
> + AV_PIX_FMT_D3D11, ///< HW decoding through Direct3D11 via new API,
> Picture.data[0] contains a ID3D11Texture2D pointer, and data[1] contains the
> texture array index of the frame as intptr_t if the ID3D11Texture2D is an
> array texture (or 0 if it's a normal texture)
The old format was specifically for decoding, but the new one is more general
than that.
Maybe just "Hardware surfaces for Direct3D11"?
> +
> AV_PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if
> you want to link with shared libav* because the number of formats might
> differ between versions
> };
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel