d3d: add support for new hwaccel API

wm4 Fri, 28 Apr 2017 13:30:27 -0700

Yes, this completely changes how the D3D11 hwcontext works.

There are still some things to fix:
- we probably need to create yet another AVHWAccel struct for each codec
- messy initialization (setting avctx.pix_fmt in hwaccel, get_format)
- make me fix FIXMEs
- GUID nonsense (getting multiple or missing definitions?)
- make sure cleanup is correct
---
 avtools/Makefile              |   1 -
 avtools/avconv_d3d11va.c      | 213 --------------
 avtools/avconv_opt.c          |   3 +-
 libavcodec/decode.c           |   4 +-
 libavcodec/dxva2.c            | 662 +++++++++++++++++++++++++++++++++++++++++-
 libavcodec/dxva2_h264.c       |  23 +-
 libavcodec/dxva2_hevc.c       |  16 +-
 libavcodec/dxva2_internal.h   |  60 +++-
 libavcodec/dxva2_mpeg2.c      |  16 +-
 libavcodec/dxva2_vc1.c        |  22 +-
 libavcodec/h264_slice.c       |   3 +-
 libavutil/hwcontext.c         |   1 +
 libavutil/hwcontext_d3d11va.c | 124 ++++----
 libavutil/hwcontext_d3d11va.h |  61 +++-
 libavutil/hwcontext_dxva2.h   |   3 +
 libavutil/pixdesc.c           |   4 +
 libavutil/pixfmt.h            |   4 +-
 17 files changed, 871 insertions(+), 349 deletions(-)
 delete mode 100644 avtools/avconv_d3d11va.c


diff --git a/avtools/Makefile b/avtools/Makefile
index 4ae0fa78f2..5d374a9360 100644
--- a/avtools/Makefile
+++ b/avtools/Makefile
@@ -13,7 +13,6 @@ OBJS-avconv                   += avtools/avconv_opt.o 
avtools/avconv_filter.o \
 OBJS-avconv-$(CONFIG_LIBMFX)  += avtools/avconv_qsv.o
 OBJS-avconv-$(CONFIG_VDA)     += avtools/avconv_vda.o
 OBJS-avconv-$(HAVE_DXVA2_LIB) += avtools/avconv_dxva2.o avtools/avconv_guid.o
-OBJS-avconv-$(HAVE_D3D11VA_LIB) += avtools/avconv_d3d11va.o 
avtools/avconv_guid.o
 
 define DOAVTOOL
 OBJS-$(1) += avtools/cmdutils.o avtools/$(1).o $(OBJS-$(1)-yes)
diff --git a/avtools/avconv_d3d11va.c b/avtools/avconv_d3d11va.c
deleted file mode 100644
index d7ec558a9c..0000000000
--- a/avtools/avconv_d3d11va.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <windows.h>
-
-#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600
-#undef _WIN32_WINNT
-#define _WIN32_WINNT 0x0600
-#endif
-#define COBJMACROS
-
-#include <stdint.h>
-
-#include <d3d11.h>
-
-#include "avconv.h"
-
-#include "libavcodec/d3d11va.h"
-
-#include "libavutil/avassert.h"
-#include "libavutil/buffer.h"
-#include "libavutil/frame.h"
-#include "libavutil/imgutils.h"
-#include "libavutil/pixfmt.h"
-
-#include "libavutil/hwcontext.h"
-#include "libavutil/hwcontext_d3d11va.h"
-
-typedef struct D3D11VAContext {
-    D3D11_VIDEO_DECODER_CONFIG   decoder_config;
-
-    AVFrame                     *tmp_frame;
-
-    AVBufferRef                 *hw_device_ctx;
-    AVBufferRef                 *hw_frames_ctx;
-} D3D11VAContext;
-
-typedef D3D11_VIDEO_DECODER_CONFIG DXVA_DECODER_CONFIG;
-typedef DXGI_FORMAT                DXVA_SURFACE_FORMAT;
-typedef D3D11VAContext             DXVA_CONTEXT;
-typedef AVD3D11VAContext           DXVA_AV_CONTEXT;
-typedef ID3D11VideoDevice          *DXVA_DECODER_SERVICE;
-#include "avconv_dxva_template.c"
-
-static int d3d11va_get_decoder_configuration(AVCodecContext *s,
-                                             const D3D11_VIDEO_DECODER_DESC 
*desc,
-                                             DXVA_DECODER_CONFIG *config)
-{
-    InputStream  *ist = s->opaque;
-    int loglevel = (ist->hwaccel_id == HWACCEL_AUTO) ? AV_LOG_VERBOSE : 
AV_LOG_ERROR;
-    unsigned cfg_count = 0;
-    DXVA_DECODER_CONFIG *cfg_list = NULL;
-    HRESULT hr;
-    int i, ret;
-
-    DXVA_CONTEXT *ctx                    = ist->hwaccel_ctx;
-    AVHWDeviceContext    *device_ctx     = 
(AVHWDeviceContext*)ctx->hw_device_ctx->data;
-    AVD3D11VADeviceContext *device_hwctx = device_ctx->hwctx;
-
-    hr = 
ID3D11VideoDevice_GetVideoDecoderConfigCount(device_hwctx->video_device, desc, 
&cfg_count);
-    if (FAILED(hr)) {
-        av_log(NULL, loglevel, "Unable to retrieve decoder configurations\n");
-        return AVERROR(EINVAL);
-    }
-
-    cfg_list = av_malloc(cfg_count * sizeof(DXVA_DECODER_CONFIG));
-    if (cfg_list == NULL)
-        return AVERROR(ENOMEM);
-    for (i = 0; i < cfg_count; i++) {
-        hr = 
ID3D11VideoDevice_GetVideoDecoderConfig(device_hwctx->video_device, desc, i, 
&cfg_list[i]);
-        if (FAILED(hr)) {
-            av_log(NULL, loglevel, "Unable to retrieve decoder configurations. 
(hr=0x%lX)\n", hr);
-            free(cfg_list);
-            return AVERROR(EINVAL);
-        }
-    }
-
-    ret = dxva_get_decoder_configuration(s, cfg_list, cfg_count, config);
-    av_free(cfg_list);
-    return ret;
-}
-
-static int d3d11va_validate_output(DXVA_DECODER_SERVICE service, GUID guid, 
DXVA_SURFACE_FORMAT surface_format)
-{
-    HRESULT hr;
-    BOOL is_supported = FALSE;
-    hr = ID3D11VideoDevice_CheckVideoDecoderFormat(service, &guid, 
surface_format, &is_supported);
-    return SUCCEEDED(hr) && is_supported;
-}
-
-static int d3d11va_create_decoder(AVCodecContext *s)
-{
-    InputStream  *ist = s->opaque;
-    int loglevel = (ist->hwaccel_id == HWACCEL_AUTO) ? AV_LOG_VERBOSE : 
AV_LOG_ERROR;
-    DXVA_CONTEXT *ctx = ist->hwaccel_ctx;
-    DXVA_AV_CONTEXT *dxva_ctx = s->hwaccel_context;
-    GUID *guid_list;
-    unsigned guid_count, i;
-    GUID decoder_guid;
-    const DXVA_SURFACE_FORMAT surface_format = s->sw_pix_fmt == 
AV_PIX_FMT_YUV420P10 ?
-                                               DXGI_FORMAT_P010 : 
DXGI_FORMAT_NV12;
-    D3D11_VIDEO_DECODER_DESC desc = { 0 };
-    DXVA_DECODER_CONFIG config;
-    HRESULT hr;
-    int ret;
-
-    AVD3D11VAFramesContext *frames_hwctx;
-    AVHWFramesContext *frames_ctx;
-
-    AVHWDeviceContext    *device_ctx;
-    AVD3D11VADeviceContext *device_hwctx;
-    device_ctx = (AVHWDeviceContext*)ctx->hw_device_ctx->data;
-    device_hwctx = device_ctx->hwctx;
-
-    guid_count = 
ID3D11VideoDevice_GetVideoDecoderProfileCount(device_hwctx->video_device);
-    guid_list = av_malloc(sizeof(*guid_list) * guid_count);
-    if (guid_list==NULL) {
-        av_log(NULL, loglevel, "Failed to get the decoder GUIDs\n");
-        goto fail;
-    }
-    for (i=0; i<guid_count; i++) {
-        hr = 
ID3D11VideoDevice_GetVideoDecoderProfile(device_hwctx->video_device, i, 
&guid_list[i]);
-        if (FAILED(hr)) {
-            av_log(NULL, loglevel, "Failed to retrieve decoder GUID %d\n", i);
-            av_free(guid_list);
-            goto fail;
-        }
-    }
-
-    ret = dxva_get_decoder_guid(s, device_hwctx->video_device, surface_format, 
d3d11va_validate_output,
-                                guid_count, guid_list, &decoder_guid);
-    av_free(guid_list);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    desc.SampleWidth  = s->coded_width;
-    desc.SampleHeight = s->coded_height;
-    desc.OutputFormat = surface_format;
-    desc.Guid         = decoder_guid;
-
-    ret = d3d11va_get_decoder_configuration(s, &desc, &config);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ctx->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->hw_device_ctx);
-    if (!ctx->hw_frames_ctx)
-        goto fail;
-    frames_ctx   = (AVHWFramesContext*)ctx->hw_frames_ctx->data;
-    frames_hwctx = frames_ctx->hwctx;
-    frames_ctx->format = AV_PIX_FMT_D3D11VA_VLD;
-
-    dxva_adjust_decoder(s);
-
-    hr = ID3D11VideoDevice_CreateVideoDecoder(device_hwctx->video_device, 
&desc,
-                                              &config, 
&frames_hwctx->video_decoder);
-    if (FAILED(hr)) {
-        av_log(NULL, loglevel, "Failed to create D3D11VA video decoder\n");
-        goto fail;
-    }
-
-    ret = av_hwframe_ctx_init(ctx->hw_frames_ctx);
-    if (ret < 0) {
-        av_log(NULL, loglevel, "Failed to initialize the HW frames context\n");
-        goto fail;
-    }
-
-    ctx->decoder_config = config;
-
-    dxva_ctx->cfg           = &ctx->decoder_config;
-    dxva_ctx->surface       = frames_hwctx->surfaces;
-    dxva_ctx->surface_count = frames_hwctx->nb_surfaces;
-    dxva_ctx->decoder       = frames_hwctx->video_decoder;
-    dxva_ctx->video_context = device_hwctx->video_context;
-    dxva_ctx->context_mutex = device_hwctx->dev_ctx_mutex;
-
-    return 0;
-fail:
-    if (frames_hwctx->video_decoder)
-        ID3D11VideoDecoder_Release(frames_hwctx->video_decoder);
-    av_buffer_unref(&ctx->hw_frames_ctx);
-    return AVERROR(EINVAL);
-}
-
-int d3d11va_init(AVCodecContext *s)
-{
-    InputStream *ist = s->opaque;
-    int ret;
-
-    if (!ist->hwaccel_ctx) {
-        ret = dxva_alloc(s, AV_HWDEVICE_TYPE_D3D11VA);
-        if (ret < 0)
-            return ret;
-    }
-
-    return dxva_init(s, d3d11va_create_decoder);
-}
diff --git a/avtools/avconv_opt.c b/avtools/avconv_opt.c
index 84c817a8d2..c847d2d26e 100644
--- a/avtools/avconv_opt.c
+++ b/avtools/avconv_opt.c
@@ -61,7 +61,8 @@ const HWAccel hwaccels[] = {
       AV_HWDEVICE_TYPE_VDPAU },
 #endif
 #if HAVE_D3D11VA_LIB
-    { "d3d11va", d3d11va_init, HWACCEL_D3D11VA, AV_PIX_FMT_D3D11VA_VLD },
+    { "d3d11va", hwaccel_decode_init, HWACCEL_D3D11VA, AV_PIX_FMT_D3D11,
+      AV_HWDEVICE_TYPE_D3D11VA },
 #endif
 #if HAVE_DXVA2_LIB
     { "dxva2", dxva2_init, HWACCEL_DXVA2, AV_PIX_FMT_DXVA2_VLD,
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index e4f6a0d727..628b879521 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -740,16 +740,16 @@ static int setup_hwaccel(AVCodecContext *avctx,
             return AVERROR(ENOMEM);
     }
 
+    avctx->hwaccel = hwa;
     if (hwa->init) {
         ret = hwa->init(avctx);
         if (ret < 0) {
             av_freep(&avctx->internal->hwaccel_priv_data);
+            avctx->hwaccel = NULL;
             return ret;
         }
     }
 
-    avctx->hwaccel = hwa;
-
     return 0;
 }
 
diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index b0452b6a9a..2cb23189e1 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -22,15 +22,620 @@
 
 #include <assert.h>
 #include <string.h>
+#include <initguid.h>
 
+#include "libavutil/common.h"
 #include "libavutil/log.h"
 #include "libavutil/time.h"
 
 #include "avcodec.h"
 #include "dxva2_internal.h"
 
-static void *get_surface(const AVFrame *frame)
+/* define all the GUIDs used directly here,
+ to avoid problems with inconsistent dxva2api.h versions in mingw-w64 and 
different MSVC version */
+DEFINE_GUID(ff_DXVA2_ModeMPEG2_VLD,      0xee27417f, 
0x5e28,0x4e65,0xbe,0xea,0x1d,0x26,0xb5,0x08,0xad,0xc9);
+DEFINE_GUID(ff_DXVA2_ModeMPEG2and1_VLD,  0x86695f12, 
0x340e,0x4f04,0x9f,0xd3,0x92,0x53,0xdd,0x32,0x74,0x60);
+DEFINE_GUID(ff_DXVA2_ModeH264_E,         0x1b81be68, 
0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
+DEFINE_GUID(ff_DXVA2_ModeH264_F,         0x1b81be69, 
0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
+DEFINE_GUID(ff_DXVADDI_Intel_ModeH264_E, 0x604F8E68, 
0x4951,0x4C54,0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6);
+DEFINE_GUID(ff_DXVA2_ModeVC1_D,          0x1b81beA3, 
0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
+DEFINE_GUID(ff_DXVA2_ModeVC1_D2010,      0x1b81beA4, 
0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
+DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main,  0x5b11d51b, 
0x2f4c,0x4452,0xbc,0xc3,0x09,0xf2,0xa1,0x16,0x0c,0xc0);
+DEFINE_GUID(ff_DXVA2_ModeHEVC_VLD_Main10,0x107af0e0, 
0xef1a,0x4d19,0xab,0xa8,0x67,0xa1,0x63,0x07,0x3d,0x13);
+DEFINE_GUID(ff_DXVA2_NoEncrypt,          0x1b81beD0, 
0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
+DEFINE_GUID(ff_GUID_NULL,                0x00000000, 
0x0000,0x0000,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00);
+DEFINE_GUID(ff_IID_IDirectXVideoDecoderService, 
0xfc51a551,0xd5e7,0x11d9,0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02);
+
+typedef struct dxva_mode {
+    const GUID     *guid;
+    enum AVCodecID codec;
+} dxva_mode;
+
+static const dxva_mode dxva_modes[] = {
+    /* MPEG-2 */
+    { &ff_DXVA2_ModeMPEG2_VLD,       AV_CODEC_ID_MPEG2VIDEO },
+    { &ff_DXVA2_ModeMPEG2and1_VLD,   AV_CODEC_ID_MPEG2VIDEO },
+
+    /* H.264 */
+    { &ff_DXVA2_ModeH264_F,          AV_CODEC_ID_H264 },
+    { &ff_DXVA2_ModeH264_E,          AV_CODEC_ID_H264 },
+    /* Intel specific H.264 mode */
+    { &ff_DXVADDI_Intel_ModeH264_E,  AV_CODEC_ID_H264 },
+
+    /* VC-1 / WMV3 */
+    { &ff_DXVA2_ModeVC1_D2010,       AV_CODEC_ID_VC1 },
+    { &ff_DXVA2_ModeVC1_D2010,       AV_CODEC_ID_WMV3 },
+    { &ff_DXVA2_ModeVC1_D,           AV_CODEC_ID_VC1 },
+    { &ff_DXVA2_ModeVC1_D,           AV_CODEC_ID_WMV3 },
+
+    /* HEVC/H.265 */
+    { &ff_DXVA2_ModeHEVC_VLD_Main,   AV_CODEC_ID_HEVC },
+    { &ff_DXVA2_ModeHEVC_VLD_Main10, AV_CODEC_ID_HEVC },
+
+    { NULL,                      0 },
+};
+
+static int dxva_get_decoder_configuration(AVCodecContext *avctx,
+                                          const void *cfg_list,
+                                          unsigned cfg_count)
+{
+    unsigned i, best_score = 0;
+    int best_cfg = -1;
+
+    for (i = 0; i < cfg_count; i++) {
+        unsigned score;
+        UINT ConfigBitstreamRaw;
+        GUID guidConfigBitstreamEncryption;
+
+#if CONFIG_D3D11VA
+        if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
+            D3D11_VIDEO_DECODER_CONFIG *cfg = &((D3D11_VIDEO_DECODER_CONFIG 
*)cfg_list)[i];
+            ConfigBitstreamRaw = cfg->ConfigBitstreamRaw;
+            guidConfigBitstreamEncryption = cfg->guidConfigBitstreamEncryption;
+        }
+#endif
+#if CONFIG_DXVA2
+        if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
+            DXVA2_ConfigPictureDecode *cfg = &((DXVA2_ConfigPictureDecode 
*)cfg_list)[i];
+            ConfigBitstreamRaw = cfg->ConfigBitstreamRaw;
+            guidConfigBitstreamEncryption = cfg->guidConfigBitstreamEncryption;
+        }
+#endif
+
+        if (ConfigBitstreamRaw == 1)
+            score = 1;
+        else if (avctx->codec_id == AV_CODEC_ID_H264 && ConfigBitstreamRaw == 
2)
+            score = 2;
+        else
+            continue;
+        if (IsEqualGUID(&guidConfigBitstreamEncryption, &ff_DXVA2_NoEncrypt))
+            score += 16;
+        if (score > best_score) {
+            best_score = score;
+            best_cfg = i;
+        }
+    }
+
+    if (!best_score) {
+        av_log(avctx, AV_LOG_VERBOSE, "No valid decoder configuration 
available\n");
+        return AVERROR(EINVAL);
+    }
+
+    return best_cfg;
+}
+
+#if CONFIG_D3D11VA
+static int d3d11va_validate_output(void *service, GUID guid, void 
*surface_format)
+{
+    HRESULT hr;
+    BOOL is_supported = FALSE;
+    hr = ID3D11VideoDevice_CheckVideoDecoderFormat((ID3D11VideoDevice 
*)service,
+                                                   &guid,
+                                                   *(DXGI_FORMAT 
*)surface_format,
+                                                   &is_supported);
+    return SUCCEEDED(hr) && is_supported;
+}
+#endif
+
+#if CONFIG_DXVA2
+static int dxva2_validate_output(void *decoder_service, GUID guid, void 
*surface_format)
+{
+    HRESULT hr;
+    int ret = 0;
+    unsigned j, target_count;
+    D3DFORMAT *target_list;
+    hr = 
IDirectXVideoDecoderService_GetDecoderRenderTargets((IDirectXVideoDecoderService
 *)decoder_service, &guid, &target_count, &target_list);
+    if (SUCCEEDED(hr)) {
+        for (j = 0; j < target_count; j++) {
+            const D3DFORMAT format = target_list[j];
+            if (format == *(D3DFORMAT *)surface_format) {
+                ret = 1;
+                break;
+            }
+        }
+        CoTaskMemFree(target_list);
+    }
+    return ret;
+}
+#endif
+
+static int dxva_get_decoder_guid(AVCodecContext *avctx, void *service, void 
*surface_format,
+                                 unsigned guid_count, const GUID *guid_list, 
GUID *decoder_guid)
+{
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    unsigned i, j;
+
+    *decoder_guid = ff_GUID_NULL;
+    for (i = 0; dxva_modes[i].guid; i++) {
+        const dxva_mode *mode = &dxva_modes[i];
+        int validate;
+        if (mode->codec != avctx->codec_id)
+            continue;
+
+        for (j = 0; j < guid_count; j++) {
+            if (IsEqualGUID(mode->guid, &guid_list[j]))
+                break;
+        }
+        if (j == guid_count)
+            continue;
+
+#if CONFIG_D3D11VA
+        if (avctx->pix_fmt == AV_PIX_FMT_D3D11)
+            validate = d3d11va_validate_output(service, *mode->guid, 
surface_format);
+#endif
+#if CONFIG_DXVA2
+        if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
+            validate = dxva2_validate_output(service, *mode->guid, 
surface_format);
+#endif
+        if (validate) {
+            *decoder_guid = *mode->guid;
+            break;
+        }
+    }
+
+    if (IsEqualGUID(decoder_guid, &ff_GUID_NULL)) {
+        av_log(avctx, AV_LOG_VERBOSE, "No decoder device for codec found\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (IsEqualGUID(decoder_guid, &ff_DXVADDI_Intel_ModeH264_E))
+        sctx->workaround |= FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO;
+
+    return 0;
+}
+
+static void bufref_free_interface(void *opaque, uint8_t *data)
+{
+    IUnknown_Release((IUnknown *)opaque);
+}
+
+static AVBufferRef *bufref_wrap_interface(IUnknown *iface)
+{
+    return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface, 
0);
+}
+
+#if CONFIG_DXVA2
+
+static int dxva2_get_decoder_configuration(AVCodecContext *avctx, const GUID 
*device_guid,
+                                           const DXVA2_VideoDesc *desc,
+                                           DXVA2_ConfigPictureDecode *config)
+{
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    unsigned cfg_count;
+    DXVA2_ConfigPictureDecode *cfg_list;
+    HRESULT hr;
+    int ret;
+
+    hr = 
IDirectXVideoDecoderService_GetDecoderConfigurations(sctx->dxva2_service, 
device_guid, desc, NULL, &cfg_count, &cfg_list);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to retrieve decoder 
configurations\n");
+        return AVERROR(EINVAL);
+    }
+
+    ret = dxva_get_decoder_configuration(avctx, cfg_list, cfg_count);
+    if (ret >= 0)
+        *config = cfg_list[ret];
+    CoTaskMemFree(cfg_list);
+    return ret;
+}
+
+static int dxva2_create_decoder(AVCodecContext *avctx)
+{
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    GUID *guid_list;
+    unsigned guid_count;
+    GUID device_guid;
+    D3DFORMAT surface_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ?
+                               MKTAG('P', '0', '1', '0') : MKTAG('N', 'V', 
'1', '2');
+    DXVA2_VideoDesc desc = { 0 };
+    DXVA2_ConfigPictureDecode config;
+    HRESULT hr;
+    int ret;
+    HANDLE device_handle;
+    AVHWFramesContext *frames_ctx = 
(AVHWFramesContext*)avctx->hw_frames_ctx->data;
+    AVDXVA2FramesContext *frames_hwctx = frames_ctx->hwctx;
+    AVDXVA2DeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+
+    hr = IDirect3DDeviceManager9_OpenDeviceHandle(device_hwctx->devmgr,
+                                                  &device_handle);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to open a device handle\n");
+        goto fail;
+    }
+
+    hr = IDirect3DDeviceManager9_GetVideoService(device_hwctx->devmgr, 
device_handle,
+                                                 
&ff_IID_IDirectXVideoDecoderService,
+                                                 (void 
**)&sctx->dxva2_service);
+    IDirect3DDeviceManager9_CloseDeviceHandle(device_hwctx->devmgr, 
device_handle);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create 
IDirectXVideoDecoderService\n");
+        goto fail;
+    }
+
+    hr = 
IDirectXVideoDecoderService_GetDecoderDeviceGuids(sctx->dxva2_service, 
&guid_count, &guid_list);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to retrieve decoder device 
GUIDs\n");
+        goto fail;
+    }
+
+    ret = dxva_get_decoder_guid(avctx, sctx->dxva2_service, &surface_format,
+                                guid_count, guid_list, &device_guid);
+    CoTaskMemFree(guid_list);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    desc.SampleWidth  = avctx->coded_width;
+    desc.SampleHeight = avctx->coded_height;
+    desc.Format       = surface_format;
+
+    ret = dxva2_get_decoder_configuration(avctx, &device_guid, &desc, &config);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    hr = IDirectXVideoDecoderService_CreateVideoDecoder(sctx->dxva2_service, 
&device_guid,
+                                                        &desc, &config, 
frames_hwctx->surfaces,
+                                                        
frames_hwctx->nb_surfaces, &sctx->dxva2_decoder);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create DXVA2 video decoder\n");
+        goto fail;
+    }
+
+    sctx->dxva2_config = config;
+
+    sctx->decoder_ref = bufref_wrap_interface((IUnknown *)sctx->dxva2_decoder);
+    if (!sctx->decoder_ref)
+        return AVERROR(ENOMEM);
+
+    return 0;
+fail:
+    return AVERROR(EINVAL);
+}
+
+#endif
+
+#if CONFIG_D3D11VA
+
+static int d3d11va_get_decoder_configuration(AVCodecContext *avctx,
+                                             const D3D11_VIDEO_DECODER_DESC 
*desc,
+                                             D3D11_VIDEO_DECODER_CONFIG 
*config)
+{
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    unsigned cfg_count = 0;
+    D3D11_VIDEO_DECODER_CONFIG *cfg_list = NULL;
+    HRESULT hr;
+    int i, ret;
+
+    hr = 
ID3D11VideoDevice_GetVideoDecoderConfigCount(sctx->d3d11_video_device, desc, 
&cfg_count);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to retrieve decoder 
configurations\n");
+        return AVERROR(EINVAL);
+    }
+
+    cfg_list = av_malloc(cfg_count * sizeof(D3D11_VIDEO_DECODER_CONFIG));
+    if (cfg_list == NULL)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < cfg_count; i++) {
+        hr = ID3D11VideoDevice_GetVideoDecoderConfig(sctx->d3d11_video_device, 
desc, i, &cfg_list[i]);
+        if (FAILED(hr)) {
+            av_log(avctx, AV_LOG_ERROR, "Unable to retrieve decoder 
configurations. (hr=0x%lX)\n", hr);
+            free(cfg_list);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    ret = dxva_get_decoder_configuration(avctx, cfg_list, cfg_count);
+    if (ret >= 0)
+        *config = cfg_list[ret];
+    av_free(cfg_list);
+    return ret;
+}
+
+static int d3d11va_create_decoder(AVCodecContext *avctx)
+{
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    GUID *guid_list;
+    unsigned guid_count, i;
+    GUID decoder_guid;
+    DXGI_FORMAT surface_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ?
+                                 DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
+    D3D11_VIDEO_DECODER_DESC desc = { 0 };
+    D3D11_VIDEO_DECODER_CONFIG config;
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext 
*)avctx->hw_frames_ctx->data;
+    AVD3D11VAFramesContext *frames_hwctx = frames_ctx->hwctx;
+    D3D11_TEXTURE2D_DESC texdesc;
+    HRESULT hr;
+    int ret;
+
+    if (!frames_hwctx->texture) {
+        av_log(avctx, AV_LOG_ERROR, "AVD3D11VAFramesContext.texture not 
set.\n");
+        return AVERROR(EINVAL);
+    }
+    ID3D11Texture2D_GetDesc(frames_hwctx->texture, &texdesc);
+
+    guid_count = 
ID3D11VideoDevice_GetVideoDecoderProfileCount(sctx->d3d11_video_device);
+    guid_list = av_malloc(sizeof(*guid_list) * guid_count);
+    if (guid_list==NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to get the decoder GUIDs\n");
+        return AVERROR(EINVAL);
+    }
+    for (i=0; i<guid_count; i++) {
+        hr = 
ID3D11VideoDevice_GetVideoDecoderProfile(sctx->d3d11_video_device, i, 
&guid_list[i]);
+        if (FAILED(hr)) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to retrieve decoder GUID 
%d\n", i);
+            av_free(guid_list);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    ret = dxva_get_decoder_guid(avctx, sctx->d3d11_video_device, 
&surface_format,
+                                guid_count, guid_list, &decoder_guid);
+    av_free(guid_list);
+    if (ret < 0)
+        return AVERROR(EINVAL);
+
+    desc.SampleWidth  = avctx->coded_width;
+    desc.SampleHeight = avctx->coded_height;
+    desc.OutputFormat = surface_format;
+    desc.Guid         = decoder_guid;
+
+    ret = d3d11va_get_decoder_configuration(avctx, &desc, &config);
+    if (ret < 0)
+        return AVERROR(EINVAL);
+
+
+    sctx->d3d11_views = av_mallocz_array(texdesc.ArraySize, 
sizeof(sctx->d3d11_views[0]));
+    if (!sctx->d3d11_views)
+        return AVERROR(ENOMEM);
+    sctx->nb_d3d11_views = texdesc.ArraySize;
+
+    for (i = 0; i < sctx->nb_d3d11_views; i++) {
+        D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC viewDesc = {
+            .DecodeProfile = decoder_guid,
+            .ViewDimension = D3D11_VDOV_DIMENSION_TEXTURE2D,
+            .Texture2D = {
+                .ArraySlice = i,
+            }
+        };
+        hr = 
ID3D11VideoDevice_CreateVideoDecoderOutputView(sctx->d3d11_video_device,
+                                                            (ID3D11Resource*) 
frames_hwctx->texture,
+                                                            &viewDesc,
+                                                            
(ID3D11VideoDecoderOutputView**) &sctx->d3d11_views[i]);
+        if (FAILED(hr)) {
+            av_log(avctx, AV_LOG_ERROR, "Could not create the decoder output 
view %d\n", i);
+            return AVERROR_UNKNOWN;
+        }
+    }
+
+    hr = ID3D11VideoDevice_CreateVideoDecoder(sctx->d3d11_video_device, &desc,
+                                              &config, &sctx->d3d11_decoder);
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create D3D11VA video 
decoder\n");
+        return AVERROR(EINVAL);
+    }
+
+    sctx->d3d11_config = config;
+    sctx->d3d11_texture = frames_hwctx->texture;
+
+    sctx->decoder_ref = bufref_wrap_interface((IUnknown *)sctx->d3d11_decoder);
+    if (!sctx->decoder_ref)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+#endif
+
+// This must work before the decoder is created.
+// This somehow needs to be exported to the user.
+static void dxva_adjust_hwframes(AVCodecContext *avctx, AVHWFramesContext 
*frames_ctx)
+{
+    int surface_alignment, num_surfaces;
+
+    frames_ctx->format = avctx->pix_fmt;
+
+    /* decoding MPEG-2 requires additional alignment on some Intel GPUs,
+    but it causes issues for H.264 on certain AMD GPUs..... */
+    if (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO)
+        surface_alignment = 32;
+    /* the HEVC DXVA2 spec asks for 128 pixel aligned surfaces to ensure
+    all coding features have enough room to work with */
+    else if (avctx->codec_id == AV_CODEC_ID_HEVC)
+        surface_alignment = 128;
+    else
+        surface_alignment = 16;
+
+    /* 4 base work surfaces */
+    num_surfaces = 4;
+
+    /* add surfaces based on number of possible refs */
+    if (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == 
AV_CODEC_ID_HEVC)
+        num_surfaces += 16;
+    else
+        num_surfaces += 2;
+
+    /* add extra surfaces for frame threading */
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        num_surfaces += avctx->thread_count;
+
+    frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ?
+                            AV_PIX_FMT_P010 : AV_PIX_FMT_NV12;
+    frames_ctx->width = FFALIGN(avctx->coded_width, surface_alignment);
+    frames_ctx->height = FFALIGN(avctx->coded_height, surface_alignment);
+    frames_ctx->initial_pool_size = num_surfaces;
+
+    if (frames_ctx->format == AV_PIX_FMT_DXVA2_VLD) {
+        AVDXVA2FramesContext *frames_hwctx = frames_ctx->hwctx;
+
+        frames_hwctx->surface_type = DXVA2_VideoDecoderRenderTarget;
+    }
+}
+
+int ff_dxva2_decode_init(AVCodecContext *avctx)
 {
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    AVHWFramesContext *frames_ctx = NULL;
+    int ret = 0;
+
+    // FIXME: it's not set yet at this point, so here's a shitty hack
+    avctx->pix_fmt = avctx->hwaccel->pix_fmt;
+
+    // Old API.
+    if (avctx->hwaccel_context)
+        return 0;
+
+    if (avctx->codec_id == AV_CODEC_ID_H264 &&
+        (avctx->profile & ~FF_PROFILE_H264_CONSTRAINED) > 
FF_PROFILE_H264_HIGH) {
+        av_log(avctx, AV_LOG_VERBOSE, "Unsupported H.264 profile for DXVA 
HWAccel: %d\n",avctx->profile);
+        return AVERROR(ENOTSUP);
+    }
+
+    if (avctx->codec_id == AV_CODEC_ID_HEVC &&
+        avctx->profile != FF_PROFILE_HEVC_MAIN && avctx->profile != 
FF_PROFILE_HEVC_MAIN_10) {
+        av_log(avctx, AV_LOG_VERBOSE, "Unsupported HEVC profile for DXVA 
HWAccel: %d\n", avctx->profile);
+        return AVERROR(ENOTSUP);
+    }
+
+    if (!avctx->hw_frames_ctx && !avctx->hw_device_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "Either a hw_frames_ctx or a hw_device_ctx 
needs to be set for hardware decoding.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->hw_frames_ctx) {
+        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+    } else {
+        avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
+        if (!avctx->hw_frames_ctx)
+            return AVERROR(ENOMEM);
+
+        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+
+        dxva_adjust_hwframes(avctx, frames_ctx);
+
+        ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
+        if (ret < 0) {
+            av_buffer_unref(&avctx->hw_frames_ctx);
+            return ret;
+        }
+    }
+
+    if (frames_ctx->format != avctx->pix_fmt ||
+        !((avctx->pix_fmt == AV_PIX_FMT_D3D11 && CONFIG_D3D11VA) ||
+          (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD && CONFIG_DXVA2))) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n");
+        av_buffer_unref(&avctx->hw_frames_ctx);
+        return AVERROR(EINVAL);
+    }
+
+#if CONFIG_D3D11VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
+        AVD3D11VADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+        AVD3D11VAContext *d3d11_ctx = &sctx->ctx.d3d11va;
+
+        sctx->d3d11_video_device = device_hwctx->video_device;
+
+        ret = d3d11va_create_decoder(avctx);
+        if (ret < 0) {
+            ff_dxva2_decode_uninit(avctx);
+            return ret;
+        }
+
+        d3d11_ctx->decoder       = sctx->d3d11_decoder;
+        d3d11_ctx->video_context = device_hwctx->video_context;
+        d3d11_ctx->cfg           = &sctx->d3d11_config;
+        d3d11_ctx->surface_count = sctx->nb_d3d11_views;
+        d3d11_ctx->surface       = sctx->d3d11_views;
+        d3d11_ctx->workaround    = sctx->workaround;
+        d3d11_ctx->context_mutex = device_hwctx->dev_ctx_mutex;
+    }
+#endif
+
+#if CONFIG_DXVA2
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
+        AVDXVA2FramesContext *frames_hwctx = frames_ctx->hwctx;
+        struct dxva_context *dxva_ctx = &sctx->ctx.dxva2;
+
+        ret = dxva2_create_decoder(avctx);
+        if (ret < 0) {
+            ff_dxva2_decode_uninit(avctx);
+            return ret;
+        }
+
+        dxva_ctx->decoder       = sctx->dxva2_decoder;
+        dxva_ctx->cfg           = &sctx->dxva2_config;
+        dxva_ctx->surface       = frames_hwctx->surfaces;
+        dxva_ctx->surface_count = frames_hwctx->nb_surfaces;
+        dxva_ctx->workaround    = sctx->workaround;
+    }
+#endif
+
+    return 0;
+}
+
+int ff_dxva2_decode_uninit(AVCodecContext *avctx)
+{
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+    int i;
+
+    av_buffer_unref(&sctx->decoder_ref);
+
+#if CONFIG_D3D11VA
+    if (sctx->d3d11_decoder)
+        ID3D11VideoDecoder_Release(sctx->d3d11_decoder);
+    for (i = 0; i < sctx->nb_d3d11_views; i++) {
+        if (sctx->d3d11_views[i])
+            ID3D11VideoDecoderOutputView_Release(sctx->d3d11_views[i]);
+    }
+    av_free(sctx->d3d11_views);
+#endif
+
+#if CONFIG_DXVA2
+    if (sctx->dxva2_decoder)
+        IDirectXVideoDecoder_Release(sctx->dxva2_decoder);
+    if (sctx->dxva2_service)
+        IDirectXVideoDecoderService_Release(sctx->dxva2_service);
+#endif
+
+    av_buffer_unref(&avctx->hw_frames_ctx);
+    return 0;
+}
+
+static void *get_surface(AVCodecContext *avctx, const AVFrame *frame)
+{
+#if CONFIG_D3D11VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
+        FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+        intptr_t index = (intptr_t)frame->data[1];
+        if (index < 0 || index >= sctx->nb_d3d11_views ||
+            sctx->d3d11_texture != (ID3D11Texture2D *)frame->data[0]) {
+            av_log(avctx, AV_LOG_ERROR, "get_buffer frame is invalid! %d %d %p 
%p\n",
+                   index, sctx->nb_d3d11_views, sctx->d3d11_texture, 
frame->data[0]);
+            return NULL;
+        }
+        return sctx->d3d11_views[index];
+    }
+#endif
     return frame->data[3];
 }
 
@@ -38,10 +643,12 @@ unsigned ff_dxva2_get_surface_index(const AVCodecContext 
*avctx,
                                     const AVDXVAContext *ctx,
                                     const AVFrame *frame)
 {
-    void *surface = get_surface(frame);
+    void *surface = get_surface(avctx, frame);
     unsigned i;
 
 #if CONFIG_D3D11VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11)
+        return (intptr_t)frame->data[1];
     if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
         D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC viewDesc;
         ID3D11VideoDecoderOutputView_GetDesc((ID3D11VideoDecoderOutputView*) 
surface, &viewDesc);
@@ -71,7 +678,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
     HRESULT hr;
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (ff_dxva2_is_d3d11(avctx))
         hr = 
ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
                                                  D3D11VA_CONTEXT(ctx)->decoder,
                                                  type,
@@ -91,7 +698,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
         memcpy(dxva_data, data, size);
 
 #if CONFIG_D3D11VA
-        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+        if (ff_dxva2_is_d3d11(avctx)) {
             D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = dsc;
             memset(dsc11, 0, sizeof(*dsc11));
             dsc11->BufferType           = type;
@@ -116,7 +723,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
     }
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (ff_dxva2_is_d3d11(avctx))
         hr = 
ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context, 
D3D11VA_CONTEXT(ctx)->decoder, type);
 #endif
 #if CONFIG_DXVA2
@@ -139,7 +746,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
                                                   DECODER_BUFFER_DESC *bs,
                                                   DECODER_BUFFER_DESC *slice))
 {
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     unsigned               buffer_count = 0;
 #if CONFIG_D3D11VA
     D3D11_VIDEO_DECODER_BUFFER_DESC buffer11[4];
@@ -151,27 +758,38 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
     int result, runs = 0;
     HRESULT hr;
     unsigned type;
+    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
+
+    // FIXME: should probably be in start_frame
+    if (sctx->decoder_ref) {
+        // FIXME: append it to the buf/extended_buf array properly
+        if (frame->buf[1])
+            av_log(avctx, AV_LOG_ERROR, "AVFrame.buf[1]!=NULL unexpected\n");
+        frame->buf[1] = av_buffer_ref(sctx->decoder_ref);
+        if (!frame->buf[1])
+            return AVERROR(ENOMEM);
+    }
 
     do {
 #if CONFIG_D3D11VA
-        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+        if (ff_dxva2_is_d3d11(avctx)) {
             if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
                 WaitForSingleObjectEx(D3D11VA_CONTEXT(ctx)->context_mutex, 
INFINITE, FALSE);
             hr = 
ID3D11VideoContext_DecoderBeginFrame(D3D11VA_CONTEXT(ctx)->video_context, 
D3D11VA_CONTEXT(ctx)->decoder,
-                                                      get_surface(frame),
+                                                      get_surface(avctx, 
frame),
                                                       0, NULL);
         }
 #endif
 #if CONFIG_DXVA2
         if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
             hr = IDirectXVideoDecoder_BeginFrame(DXVA2_CONTEXT(ctx)->decoder,
-                                                 get_surface(frame),
+                                                 get_surface(avctx, frame),
                                                  NULL);
 #endif
         if (hr != E_PENDING || ++runs > 50)
             break;
 #if CONFIG_D3D11VA
-        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+        if (ff_dxva2_is_d3d11(avctx))
             if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
                 ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
 #endif
@@ -181,7 +799,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
     if (FAILED(hr)) {
         av_log(avctx, AV_LOG_ERROR, "Failed to begin frame: 0x%x\n", hr);
 #if CONFIG_D3D11VA
-        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+        if (ff_dxva2_is_d3d11(avctx))
             if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
                 ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
 #endif
@@ -189,7 +807,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
     }
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         buffer = &buffer11[buffer_count];
         type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
     }
@@ -212,7 +830,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 
     if (qm_size > 0) {
 #if CONFIG_D3D11VA
-        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+        if (ff_dxva2_is_d3d11(avctx)) {
             buffer = &buffer11[buffer_count];
             type = D3D11_VIDEO_DECODER_BUFFER_INVERSE_QUANTIZATION_MATRIX;
         }
@@ -235,7 +853,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
     }
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         buffer       = &buffer11[buffer_count + 0];
         buffer_slice = &buffer11[buffer_count + 1];
     }
@@ -262,7 +880,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
     assert(buffer_count == 1 + (qm_size > 0) + 2);
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (ff_dxva2_is_d3d11(avctx))
         hr = 
ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context,
                                                      
D3D11VA_CONTEXT(ctx)->decoder,
                                                      buffer_count, buffer11);
@@ -284,7 +902,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, 
AVFrame *frame,
 
 end:
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         hr = 
ID3D11VideoContext_DecoderEndFrame(D3D11VA_CONTEXT(ctx)->video_context, 
D3D11VA_CONTEXT(ctx)->decoder);
         if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
             ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
@@ -299,5 +917,17 @@ end:
         result = -1;
     }
 
+    av_log(avctx, AV_LOG_WARNING, "end frame result=%d\n", result); // FIXME 
remove
+
     return result;
 }
+
+int ff_dxva2_is_d3d11(AVCodecContext *avctx)
+{
+#if CONFIG_D3D11VA
+    return avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD ||
+           avctx->pix_fmt == AV_PIX_FMT_D3D11;
+#else
+    return 0;
+#endif
+}
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 84959c532d..59e488d68d 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -220,7 +220,7 @@ static void fill_slice_long(AVCodecContext *avctx, 
DXVA_Slice_H264_Long *slice,
 {
     const H264Context *h = avctx->priv_data;
     H264SliceContext *sl = &h->slice_ctx[0];
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     unsigned list;
 
     memset(slice, 0, sizeof(*slice));
@@ -302,7 +302,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
 {
     const H264Context *h = avctx->priv_data;
     const unsigned mb_count = h->mb_width * h->mb_height;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     const H264Picture *current_picture = h->cur_pic_ptr;
     struct dxva2_picture_context *ctx_pic = 
current_picture->hwaccel_picture_private;
     DXVA_Slice_H264_Short *slice = NULL;
@@ -317,7 +317,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
 
     /* Create an annex B bitstream buffer with only slice NAL and finalize 
slice */
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
         if 
(FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
                                                        
D3D11VA_CONTEXT(ctx)->decoder,
@@ -388,7 +388,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         slice->SliceBytesInBuffer += padding;
     }
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (ff_dxva2_is_d3d11(avctx))
         if 
(FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
 D3D11VA_CONTEXT(ctx)->decoder, type)))
             return -1;
 #endif
@@ -401,7 +401,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         return -1;
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs;
         memset(dsc11, 0, sizeof(*dsc11));
         dsc11->BufferType           = type;
@@ -442,7 +442,7 @@ static int dxva2_h264_start_frame(AVCodecContext *avctx,
                                   av_unused uint32_t size)
 {
     const H264Context *h = avctx->priv_data;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     struct dxva2_picture_context *ctx_pic = 
h->cur_pic_ptr->hwaccel_picture_private;
 
     if (!DXVA_CONTEXT_VALID(avctx, ctx))
@@ -467,7 +467,7 @@ static int dxva2_h264_decode_slice(AVCodecContext *avctx,
 {
     const H264Context *h = avctx->priv_data;
     const H264SliceContext *sl = &h->slice_ctx[0];
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     const H264Picture *current_picture = h->cur_pic_ptr;
     struct dxva2_picture_context *ctx_pic = 
current_picture->hwaccel_picture_private;
     unsigned position;
@@ -518,10 +518,13 @@ AVHWAccel ff_h264_dxva2_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
     .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_h264_start_frame,
     .decode_slice   = dxva2_h264_decode_slice,
     .end_frame      = dxva2_h264_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
@@ -530,10 +533,14 @@ AVHWAccel ff_h264_d3d11va_hwaccel = {
     .name           = "h264_d3d11va",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
-    .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
+    // FIXME: to keep compatibility, we probably need 2 d3d11 hwaccels for 
each codec
+    .pix_fmt        = AV_PIX_FMT_D3D11,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_h264_start_frame,
     .decode_slice   = dxva2_h264_decode_slice,
     .end_frame      = dxva2_h264_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
diff --git a/libavcodec/dxva2_hevc.c b/libavcodec/dxva2_hevc.c
index 17548d25bb..5cc8a56d5d 100644
--- a/libavcodec/dxva2_hevc.c
+++ b/libavcodec/dxva2_hevc.c
@@ -243,7 +243,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
                                              DECODER_BUFFER_DESC *sc)
 {
     const HEVCContext *h = avctx->priv_data;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     const HEVCFrame *current_picture = h->ref;
     struct hevc_dxva2_picture_context *ctx_pic = 
current_picture->hwaccel_picture_private;
     DXVA_Slice_HEVC_Short *slice = NULL;
@@ -258,7 +258,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
 
     /* Create an annex B bitstream buffer with only slice NAL and finalize 
slice */
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
         if 
(FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
                                                        
D3D11VA_CONTEXT(ctx)->decoder,
@@ -312,7 +312,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         slice->SliceBytesInBuffer += padding;
     }
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (ff_dxva2_is_d3d11(avctx))
         if 
(FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
 D3D11VA_CONTEXT(ctx)->decoder, type)))
             return -1;
 #endif
@@ -325,7 +325,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         return -1;
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs;
         memset(dsc11, 0, sizeof(*dsc11));
         dsc11->BufferType           = type;
@@ -362,7 +362,7 @@ static int dxva2_hevc_start_frame(AVCodecContext *avctx,
                                   av_unused uint32_t size)
 {
     const HEVCContext *h = avctx->priv_data;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     struct hevc_dxva2_picture_context *ctx_pic = 
h->ref->hwaccel_picture_private;
 
     if (!DXVA_CONTEXT_VALID(avctx, ctx))
@@ -427,10 +427,13 @@ AVHWAccel ff_hevc_dxva2_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
     .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_hevc_start_frame,
     .decode_slice   = dxva2_hevc_decode_slice,
     .end_frame      = dxva2_hevc_end_frame,
     .frame_priv_data_size = sizeof(struct hevc_dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
@@ -440,9 +443,12 @@ AVHWAccel ff_hevc_d3d11va_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_HEVC,
     .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_hevc_start_frame,
     .decode_slice   = dxva2_hevc_decode_slice,
     .end_frame      = dxva2_hevc_end_frame,
     .frame_priv_data_size = sizeof(struct hevc_dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
index 8b774b14f3..9dc5a54576 100644
--- a/libavcodec/dxva2_internal.h
+++ b/libavcodec/dxva2_internal.h
@@ -32,9 +32,11 @@
 
 #if CONFIG_DXVA2
 #include "dxva2.h"
+#include "libavutil/hwcontext_dxva2.h"
 #endif
 #if CONFIG_D3D11VA
 #include "d3d11va.h"
+#include "libavutil/hwcontext_d3d11va.h"
 #endif
 #if HAVE_DXVA_H
 /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, dxva.h
@@ -46,7 +48,10 @@
 #include <dxva.h>
 #endif
 
+#include "libavutil/hwcontext.h"
+
 #include "avcodec.h"
+#include "internal.h"
 
 typedef void DECODER_BUFFER_DESC;
 
@@ -59,21 +64,52 @@ typedef union {
 #endif
 } AVDXVAContext;
 
+typedef struct FFDXVASharedContext {
+    AVBufferRef *decoder_ref;
+
+    // FF_DXVA2_WORKAROUND_* flags
+    uint64_t workaround;
+
+#if CONFIG_D3D11VA
+    ID3D11VideoDecoder             *d3d11_decoder;
+    ID3D11VideoDevice              *d3d11_video_device;
+    D3D11_VIDEO_DECODER_CONFIG      d3d11_config;
+    ID3D11VideoDecoderOutputView  **d3d11_views;
+    int                          nb_d3d11_views;
+    ID3D11Texture2D                *d3d11_texture;
+#endif
+
+#if CONFIG_DXVA2
+    IDirectXVideoDecoder           *dxva2_decoder;
+    IDirectXVideoDecoderService    *dxva2_service;
+    DXVA2_ConfigPictureDecode       dxva2_config;
+#endif
+
+    // Legacy (but used by code outside of setup)
+    // In generic mode, DXVA_CONTEXT() will return a pointer to this.
+    AVDXVAContext ctx;
+} FFDXVASharedContext;
+
+#define DXVA_SHARED_CONTEXT(avctx) 
((FFDXVASharedContext*)((avctx)->internal->hwaccel_priv_data))
+
+#define DXVA_CONTEXT(avctx) (AVDXVAContext*)((avctx)->hwaccel_context ? 
(avctx)->hwaccel_context : (&(DXVA_SHARED_CONTEXT(avctx)->ctx)))
+
+
 #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va)
 #define DXVA2_CONTEXT(ctx)   (&ctx->dxva2)
 
 #if CONFIG_D3D11VA && CONFIG_DXVA2
-#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? ctx->d3d11va.workaround : ctx->dxva2.workaround)
-#define DXVA_CONTEXT_COUNT(avctx, ctx)          (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? ctx->d3d11va.surface_count : ctx->dxva2.surface_count)
-#define DXVA_CONTEXT_DECODER(avctx, ctx)        (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? ctx->d3d11va.decoder : ctx->dxva2.decoder)
-#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? &ctx->d3d11va.report_id : &ctx->dxva2.report_id))
-#define DXVA_CONTEXT_CFG(avctx, ctx)            (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? ctx->d3d11va.cfg : ctx->dxva2.cfg)
-#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? ctx->d3d11va.cfg->ConfigBitstreamRaw : 
ctx->dxva2.cfg->ConfigBitstreamRaw)
-#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? ctx->d3d11va.cfg->ConfigIntraResidUnsigned : 
ctx->dxva2.cfg->ConfigIntraResidUnsigned)
-#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD ? ctx->d3d11va.cfg->ConfigResidDiffAccelerator : 
ctx->dxva2.cfg->ConfigResidDiffAccelerator)
+#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ff_dxva2_is_d3d11(avctx) ? 
ctx->d3d11va.workaround : ctx->dxva2.workaround)
+#define DXVA_CONTEXT_COUNT(avctx, ctx)          (ff_dxva2_is_d3d11(avctx) ? 
ctx->d3d11va.surface_count : ctx->dxva2.surface_count)
+#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ff_dxva2_is_d3d11(avctx) ? 
ctx->d3d11va.decoder : ctx->dxva2.decoder)
+#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(ff_dxva2_is_d3d11(avctx) ? 
&ctx->d3d11va.report_id : &ctx->dxva2.report_id))
+#define DXVA_CONTEXT_CFG(avctx, ctx)            (ff_dxva2_is_d3d11(avctx) ? 
ctx->d3d11va.cfg : ctx->dxva2.cfg)
+#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (ff_dxva2_is_d3d11(avctx) ? 
ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg->ConfigBitstreamRaw)
+#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? 
ctx->d3d11va.cfg->ConfigIntraResidUnsigned : 
ctx->dxva2.cfg->ConfigIntraResidUnsigned)
+#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ? 
ctx->d3d11va.cfg->ConfigResidDiffAccelerator : 
ctx->dxva2.cfg->ConfigResidDiffAccelerator)
 #define DXVA_CONTEXT_VALID(avctx, ctx)          (DXVA_CONTEXT_DECODER(avctx, 
ctx) && \
                                                  DXVA_CONTEXT_CFG(avctx, ctx)  
   && \
-                                                 (avctx->pix_fmt == 
AV_PIX_FMT_D3D11VA_VLD || ctx->dxva2.surface_count))
+                                                 (ff_dxva2_is_d3d11(avctx) || 
ctx->dxva2.surface_count))
 #elif CONFIG_DXVA2
 #define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ctx->dxva2.workaround)
 #define DXVA_CONTEXT_COUNT(avctx, ctx)          (ctx->dxva2.surface_count)
@@ -113,4 +149,10 @@ int ff_dxva2_common_end_frame(AVCodecContext *, AVFrame *,
                                                   DECODER_BUFFER_DESC *bs,
                                                   DECODER_BUFFER_DESC *slice));
 
+int ff_dxva2_decode_init(AVCodecContext *avctx);
+
+int ff_dxva2_decode_uninit(AVCodecContext *avctx);
+
+int ff_dxva2_is_d3d11(AVCodecContext *avctx);
+
 #endif /* AVCODEC_DXVA2_INTERNAL_H */
diff --git a/libavcodec/dxva2_mpeg2.c b/libavcodec/dxva2_mpeg2.c
index a45904963c..9016d61b62 100644
--- a/libavcodec/dxva2_mpeg2.c
+++ b/libavcodec/dxva2_mpeg2.c
@@ -156,7 +156,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
                                              DECODER_BUFFER_DESC *sc)
 {
     const struct MpegEncContext *s = avctx->priv_data;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     struct dxva2_picture_context *ctx_pic =
         s->current_picture_ptr->hwaccel_picture_private;
     const int is_field = s->picture_structure != PICT_FRAME;
@@ -168,7 +168,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
     unsigned type;
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
         if 
(FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
                                                        
D3D11VA_CONTEXT(ctx)->decoder,
@@ -212,7 +212,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         current += size;
     }
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (ff_dxva2_is_d3d11(avctx))
         if 
(FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
 D3D11VA_CONTEXT(ctx)->decoder, type)))
             return -1;
 #endif
@@ -225,7 +225,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         return -1;
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs;
         memset(dsc11, 0, sizeof(*dsc11));
         dsc11->BufferType           = type;
@@ -259,7 +259,7 @@ static int dxva2_mpeg2_start_frame(AVCodecContext *avctx,
                                    av_unused uint32_t size)
 {
     const struct MpegEncContext *s = avctx->priv_data;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     struct dxva2_picture_context *ctx_pic =
         s->current_picture_ptr->hwaccel_picture_private;
 
@@ -323,10 +323,13 @@ AVHWAccel ff_mpeg2_dxva2_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
     .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_mpeg2_start_frame,
     .decode_slice   = dxva2_mpeg2_decode_slice,
     .end_frame      = dxva2_mpeg2_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
@@ -336,9 +339,12 @@ AVHWAccel ff_mpeg2_d3d11va_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
     .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_mpeg2_start_frame,
     .decode_slice   = dxva2_mpeg2_decode_slice,
     .end_frame      = dxva2_mpeg2_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
diff --git a/libavcodec/dxva2_vc1.c b/libavcodec/dxva2_vc1.c
index 0672c97a48..3f14b5d6a7 100644
--- a/libavcodec/dxva2_vc1.c
+++ b/libavcodec/dxva2_vc1.c
@@ -165,7 +165,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
                                              DECODER_BUFFER_DESC *sc)
 {
     const VC1Context *v = avctx->priv_data;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     const MpegEncContext *s = &v->s;
     struct dxva2_picture_context *ctx_pic = 
s->current_picture_ptr->hwaccel_picture_private;
 
@@ -184,7 +184,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
     unsigned type;
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
         if 
(FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
                                                        
D3D11VA_CONTEXT(ctx)->decoder,
@@ -215,7 +215,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         slice->dwSliceBitsInBuffer = 8 * data_size;
     }
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (ff_dxva2_is_d3d11(avctx))
         if 
(FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
 D3D11VA_CONTEXT(ctx)->decoder, type)))
             return -1;
 #endif
@@ -228,7 +228,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext 
*avctx,
         return result;
 
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+    if (ff_dxva2_is_d3d11(avctx)) {
         D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs;
         memset(dsc11, 0, sizeof(*dsc11));
         dsc11->BufferType           = type;
@@ -261,7 +261,7 @@ static int dxva2_vc1_start_frame(AVCodecContext *avctx,
                                  av_unused uint32_t size)
 {
     const VC1Context *v = avctx->priv_data;
-    AVDXVAContext *ctx = avctx->hwaccel_context;
+    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
     struct dxva2_picture_context *ctx_pic = 
v->s.current_picture_ptr->hwaccel_picture_private;
 
     if (!DXVA_CONTEXT_VALID(avctx, ctx))
@@ -323,10 +323,13 @@ AVHWAccel ff_wmv3_dxva2_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV3,
     .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
@@ -336,10 +339,13 @@ AVHWAccel ff_vc1_dxva2_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VC1,
     .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
@@ -349,10 +355,13 @@ AVHWAccel ff_wmv3_d3d11va_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV3,
     .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
 
@@ -362,9 +371,12 @@ AVHWAccel ff_vc1_d3d11va_hwaccel = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VC1,
     .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
+    .init           = ff_dxva2_decode_init,
+    .uninit         = ff_dxva2_decode_uninit,
     .start_frame    = dxva2_vc1_start_frame,
     .decode_slice   = dxva2_vc1_decode_slice,
     .end_frame      = dxva2_vc1_end_frame,
     .frame_priv_data_size = sizeof(struct dxva2_picture_context),
+    .priv_data_size = sizeof(FFDXVASharedContext),
 };
 #endif
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 427cbe618c..97adaff031 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -717,7 +717,7 @@ static void init_scan_tables(H264Context *h)
 static enum AVPixelFormat get_pixel_format(H264Context *h)
 {
 #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \
-                     CONFIG_H264_D3D11VA_HWACCEL + \
+                     CONFIG_H264_D3D11VA_HWACCEL * 2 + \
                      CONFIG_H264_VAAPI_HWACCEL + \
                      (CONFIG_H264_VDA_HWACCEL * 2) + \
                      CONFIG_H264_VDPAU_HWACCEL)
@@ -769,6 +769,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h)
 #endif
 #if CONFIG_H264_D3D11VA_HWACCEL
             *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
+            *fmt++ = AV_PIX_FMT_D3D11;
 #endif
 #if CONFIG_H264_VAAPI_HWACCEL
             *fmt++ = AV_PIX_FMT_VAAPI;
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 9d1bab258a..324c8d5928 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -53,6 +53,7 @@ static const HWContextType * const hw_table[] = {
 const char *hw_type_names[] = {
     [AV_HWDEVICE_TYPE_CUDA]   = "cuda",
     [AV_HWDEVICE_TYPE_DXVA2]  = "dxva2",
+    [AV_HWDEVICE_TYPE_D3D11VA] = "d3d11va",
     [AV_HWDEVICE_TYPE_QSV]    = "qsv",
     [AV_HWDEVICE_TYPE_VAAPI]  = "vaapi",
     [AV_HWDEVICE_TYPE_VDPAU]  = "vdpau",
diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c
index a23a4750df..400f8db104 100644
--- a/libavutil/hwcontext_d3d11va.c
+++ b/libavutil/hwcontext_d3d11va.c
@@ -40,7 +40,6 @@
 typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void 
**ppFactory);
 
 typedef struct D3D11VAFramesContext {
-    ID3D11VideoDecoderOutputView **surfaces_internal;
     int              nb_surfaces_used;
 
     ID3D11DeviceContext *d3d11_context;
@@ -67,23 +66,19 @@ static void d3d11va_frames_uninit(AVHWFramesContext *ctx)
     AVD3D11VAFramesContext *frames_hwctx = ctx->hwctx;
     D3D11VAFramesContext *s = ctx->internal->priv;
 
-    av_freep(&s->surfaces_internal);
-
-    if (frames_hwctx->video_decoder)
-        ID3D11VideoDecoder_Release(frames_hwctx->video_decoder);
+    if (frames_hwctx->texture)
+        ID3D11Texture2D_Release(frames_hwctx->texture);
 
     if (s->staging_texture)
         ID3D11Texture2D_Release(s->staging_texture);
 
-    if (s->d3d11_context) {
+    if (s->d3d11_context)
         ID3D11DeviceContext_Release(s->d3d11_context);
-        s->d3d11_context = NULL;
-    }
 }
 
 static void free_surface(void *opaque, uint8_t *data)
 {
-    ID3D11VideoDecoderOutputView_Release((ID3D11VideoDecoderOutputView*)data);
+    ID3D11Texture2D_Release((ID3D11Texture2D *)opaque);
 }
 
 static AVBufferRef *d3d11va_pool_alloc(void *opaque, int size)
@@ -91,29 +86,44 @@ static AVBufferRef *d3d11va_pool_alloc(void *opaque, int 
size)
     AVHWFramesContext        *ctx = (AVHWFramesContext*)opaque;
     D3D11VAFramesContext       *s = ctx->internal->priv;
     AVD3D11VAFramesContext *hwctx = ctx->hwctx;
+    AVD3D11FrameDescriptor *desc;
+    D3D11_TEXTURE2D_DESC texdesc;
+    AVBufferRef *buf;
+
+    if (!hwctx->texture)
+        return NULL;
+
+    ID3D11Texture2D_GetDesc(hwctx->texture, &texdesc);
 
-    if (s->nb_surfaces_used < hwctx->nb_surfaces) {
-        s->nb_surfaces_used++;
-        return 
av_buffer_create((uint8_t*)s->surfaces_internal[s->nb_surfaces_used - 1],
-                                sizeof(*hwctx->surfaces), free_surface, 0, 0);
+    if (s->nb_surfaces_used >= texdesc.ArraySize)
+        return NULL;
+
+    desc = av_mallocz(sizeof(*desc));
+    if (!desc)
+        return NULL;
+
+    buf = av_buffer_create((uint8_t *)desc, sizeof(desc), free_surface, 
hwctx->texture, 0);
+    if (!buf) {
+        av_free(desc);
+        return NULL;
     }
+    ID3D11Texture2D_AddRef(hwctx->texture);
+
+    desc->texture = hwctx->texture;
+    desc->index = s->nb_surfaces_used++;
 
-    return NULL;
+    return buf;
 }
 
 static int d3d11va_init_pool(AVHWFramesContext *ctx)
 {
-    AVD3D11VAFramesContext *frames_hwctx = ctx->hwctx;
+    AVD3D11VAFramesContext *hwctx = ctx->hwctx;
     AVD3D11VADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
     D3D11VAFramesContext              *s = ctx->internal->priv;
 
     int i;
     HRESULT hr;
     D3D11_TEXTURE2D_DESC texDesc = {0};
-    ID3D11Texture2D *p_texture;
-    D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC viewDesc = {0};
-    D3D11_VIDEO_DECODER_DESC decoderDesc;
-    D3D11_VIDEO_DECODER_CONFIG decoderConfig;
     ID3D11Device *d3d11_device;
 
     if (ctx->initial_pool_size <= 0)
@@ -138,11 +148,6 @@ static int d3d11va_init_pool(AVHWFramesContext *ctx)
         return AVERROR(EINVAL);
     }
 
-    s->surfaces_internal = av_mallocz_array(ctx->initial_pool_size,
-                                            sizeof(*s->surfaces_internal));
-    if (!s->surfaces_internal)
-        return AVERROR(ENOMEM);
-
     texDesc.Width = ctx->width;
     texDesc.Height = ctx->height;
     texDesc.MipLevels = 1;
@@ -158,41 +163,25 @@ static int d3d11va_init_pool(AVHWFramesContext *ctx)
         return AVERROR_UNKNOWN;
     }
 
-    hr = ID3D11Device_CreateTexture2D(d3d11_device, &texDesc, NULL, 
&p_texture);
-    if (FAILED(hr)) {
-        av_log(ctx, AV_LOG_ERROR, "Could not create the texture %lx\n", hr);
-        ID3D11Device_Release(d3d11_device);
-        return AVERROR_UNKNOWN;
-    }
-
-    hr = ID3D11VideoDecoder_GetCreationParameters(frames_hwctx->video_decoder, 
&decoderDesc, &decoderConfig);
-    if (FAILED(hr)) {
-        av_log(ctx, AV_LOG_ERROR, "Could not get the decoder config %lx\n", 
hr);
-        ID3D11Texture2D_Release(p_texture);
-        ID3D11Device_Release(d3d11_device);
-        return AVERROR_UNKNOWN;
-    }
+    if (hwctx->texture) {
+        D3D11_TEXTURE2D_DESC texDesc2;
+        ID3D11Texture2D_GetDesc(hwctx->texture, &texDesc2);
 
-    viewDesc.DecodeProfile = decoderDesc.Guid;
-    viewDesc.ViewDimension = D3D11_VDOV_DIMENSION_TEXTURE2D;
-    for (i=0; i<ctx->initial_pool_size; i++)
-    {
-        hr = 
ID3D11VideoDevice_CreateVideoDecoderOutputView(device_hwctx->video_device,
-                                                            (ID3D11Resource*) 
p_texture,
-                                                            &viewDesc,
-                                                            
(ID3D11VideoDecoderOutputView**) &s->surfaces_internal[i]);
+        if (texDesc.Width != texDesc2.Width ||
+            texDesc.Height != texDesc2.Height ||
+            texDesc.Format != texDesc2.Format) {
+            av_log(ctx, AV_LOG_ERROR, "User-provided texture has mismatching 
parameters\n");
+            ID3D11Device_Release(d3d11_device);
+            return AVERROR(EINVAL);
+        }
+    } else {
+        hr = ID3D11Device_CreateTexture2D(d3d11_device, &texDesc, NULL, 
&hwctx->texture);
         if (FAILED(hr)) {
-            av_log(ctx, AV_LOG_ERROR, "Could not create the decoder output 
%d\n", i);
-            while (--i >= 0) {
-                ID3D11VideoDecoderOutputView_Release(s->surfaces_internal[i]);
-                s->surfaces_internal[i] = NULL;
-            }
-            ID3D11Texture2D_Release(p_texture);
+            av_log(ctx, AV_LOG_ERROR, "Could not create the texture %lx\n", 
hr);
             ID3D11Device_Release(d3d11_device);
             return AVERROR_UNKNOWN;
         }
     }
-    ID3D11Texture2D_Release(p_texture);
 
     texDesc.ArraySize = 1;
     texDesc.Usage = D3D11_USAGE_STAGING;
@@ -205,14 +194,11 @@ static int d3d11va_init_pool(AVHWFramesContext *ctx)
         return AVERROR_UNKNOWN;
     }
 
-    ctx->internal->pool_internal = 
av_buffer_pool_init2(sizeof(*s->surfaces_internal),
+    ctx->internal->pool_internal = 
av_buffer_pool_init2(sizeof(AVD3D11FrameDescriptor),
                                                         ctx, 
d3d11va_pool_alloc, NULL);
     if (!ctx->internal->pool_internal)
         return AVERROR(ENOMEM);
 
-    frames_hwctx->surfaces    = s->surfaces_internal;
-    frames_hwctx->nb_surfaces = ctx->initial_pool_size;
-
     return 0;
 }
 
@@ -234,12 +220,17 @@ static int d3d11va_frames_init(AVHWFramesContext *ctx)
 
 static int d3d11va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
 {
+    AVD3D11FrameDescriptor *desc;
+
     frame->buf[0] = av_buffer_pool_get(ctx->pool);
     if (!frame->buf[0])
         return AVERROR(ENOMEM);
 
-    frame->data[3] = frame->buf[0]->data;
-    frame->format  = AV_PIX_FMT_D3D11VA_VLD;
+    desc = (AVD3D11FrameDescriptor *)frame->buf[0]->data;
+
+    frame->data[0] = (uint8_t *)desc->texture;
+    frame->data[1] = (uint8_t *)(intptr_t)desc->index;
+    frame->format  = AV_PIX_FMT_D3D11;
     frame->width   = ctx->width;
     frame->height  = ctx->height;
 
@@ -267,7 +258,6 @@ static int d3d11va_transfer_get_formats(AVHWFramesContext 
*ctx,
 static int d3d11va_transfer_data(AVHWFramesContext *ctx, AVFrame *dst,
                                  const AVFrame *src)
 {
-    ID3D11VideoDecoderOutputView *surface;
     D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC    surfaceDesc;
     D3D11_TEXTURE2D_DESC dstDesc;
     D3D11_MAPPED_SUBRESOURCE     LockedRect;
@@ -280,22 +270,24 @@ static int d3d11va_transfer_data(AVHWFramesContext *ctx, 
AVFrame *dst,
     uint8_t *surf_data[4]     = { NULL };
     int      surf_linesize[4] = { 0 };
     int i;
+    int index;
 
     int download = !!src->hw_frames_ctx;
 
-    surface = (ID3D11VideoDecoderOutputView*)(download ? src->data[3] : 
dst->data[3]);
+    const AVFrame *frame = download ? src : dst;
+
+    // (The interface types are compatible.)
+    pTexture = (ID3D11Resource *)(ID3D11Texture2D *)frame->data[0];
+    index = (intptr_t)frame->data[1];
 
-    ID3D11VideoDecoderOutputView_GetDesc(surface, &surfaceDesc);
-    ID3D11VideoDecoderOutputView_GetResource(surface, &pTexture);
     ID3D11Texture2D_GetDesc(s->staging_texture, &dstDesc);
 
     WaitForSingleObjectEx(device_hwctx->dev_ctx_mutex, INFINITE, FALSE);
 
     ID3D11DeviceContext_CopySubresourceRegion(s->d3d11_context, 
(ID3D11Resource*)s->staging_texture,
                                               0, 0, 0, 0,
-                                              (ID3D11Resource*)pTexture, 
surfaceDesc.Texture2D.ArraySlice,
+                                              (ID3D11Resource*)pTexture, index,
                                               NULL);
-    ID3D11Resource_Release(pTexture);
 
     hr = ID3D11DeviceContext_Map(s->d3d11_context, 
(ID3D11Resource*)s->staging_texture,
                                  0, download ? D3D11_MAP_READ : 
D3D11_MAP_WRITE, 0, &LockedRect);
@@ -456,5 +448,5 @@ const HWContextType ff_hwcontext_type_d3d11va = {
     .transfer_data_to     = d3d11va_transfer_data,
     .transfer_data_from   = d3d11va_transfer_data,
 
-    .pix_fmts             = (const enum AVPixelFormat[]){ 
AV_PIX_FMT_D3D11VA_VLD, AV_PIX_FMT_NONE },
+    .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_D3D11, 
AV_PIX_FMT_NONE },
 };
diff --git a/libavutil/hwcontext_d3d11va.h b/libavutil/hwcontext_d3d11va.h
index 1a649d82ad..a1c1684f47 100644
--- a/libavutil/hwcontext_d3d11va.h
+++ b/libavutil/hwcontext_d3d11va.h
@@ -24,10 +24,8 @@
  * @file
  * An API-specific header for AV_HWDEVICE_TYPE_D3D11VA.
  *
- * Only fixed-size pools are supported.
- *
- * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
- * with the data pointer set to a pointer to ID3D11VideoDecoderOutputView.
+ * Only fixed-size pools are supported by the default pool implementation,
+ * although user-allocated pools can extend this.
  */
 
 #include <d3d11.h>
@@ -47,24 +45,55 @@ typedef struct AVD3D11VADeviceContext {
 } AVD3D11VADeviceContext;
 
 /**
- * This struct is allocated as AVHWFramesContext.hwctx
+ * D3D11 frame descriptor for pool allocation.
+ *
+ * In user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
+ * with the data pointer pointing at an object of this type describing the
+ * planes of the frame.
  */
-typedef struct AVD3D11VAFramesContext {
+typedef struct AVD3D11FrameDescriptor {
+    /**
+     * The texture in which the frame is located in. The reference count is
+     * managed by the AVBufferRef, and destroying the reference will release
+     * the interface.
+     *
+     * Normally stored in AVFrame.data[0].
+     */
+    ID3D11Texture2D *texture;
     /**
-     * The surface pool. When an external pool is not provided by the caller,
-     * this will be managed (allocated and filled on init, freed on uninit) by
-     * libavutil.
-     * When it is provided the allocation/deallocation is up to the caller.
+     * The index into the array texture element representing the frame, or 0
+     * if the texture is not an array texture.
+     *
+     * Normally stored in AVFrame.data[1] (casted from intptr_t).
      */
-    ID3D11VideoDecoderOutputView **surfaces;
-    int                         nb_surfaces;
+    intptr_t index;
+} AVD3D11FrameDescriptor;
 
+/**
+ * This struct is allocated as AVHWFramesContext.hwctx
+ */
+typedef struct AVD3D11VAFramesContext {
     /**
-     * Video decoder created by the caller. It must be set before
-     * av_hwframe_ctx_init() is called. When decoding is done it will be
-     * released.
+     * The canonical texture used for pool allocation. If this is set to NULL
+     * on init, the hwframes implementation will allocate and set an array
+     * texture if initial_pool_size > 0.
+     *
+     * The only situation when the API user should set this is:
+     * - the user wants to do manual pool allocation (setting
+     *   AVHWFramesContext.pool), instead of letting AVHWFramesContext
+     *   allocate the pool
+     * - of an array texture
+     * - and wants it to use it for decoding
+     * - this has to be done before calling av_hwframe_ctx_init()
+     *
+     * Deallocating the AVHWFramesContext will always release this interface,
+     * and it does not matter whether it was user-allocated.
+     *
+     * This is in particular used by the libavcodec D3D11VA hwaccel, which
+     * requires a single array texture. It will create 
ID3D11VideoDecoderOutputView
+     * objects for each array texture item.
      */
-    ID3D11VideoDecoder  *video_decoder;
+    ID3D11Texture2D *texture;
 } AVD3D11VAFramesContext;
 
 #endif /* AVUTIL_HWCONTEXT_D3D11VA_H */
diff --git a/libavutil/hwcontext_dxva2.h b/libavutil/hwcontext_dxva2.h
index 2290c26066..c8e7a5c978 100644
--- a/libavutil/hwcontext_dxva2.h
+++ b/libavutil/hwcontext_dxva2.h
@@ -65,6 +65,9 @@ typedef struct AVDXVA2FramesContext {
      *
      * If it is non-NULL, libavutil will call IDirectXVideoDecoder_Release() on
      * it just before the internal surface pool is freed.
+     *
+     * This is for convenience only. Some code uses other methods to manage the
+     * decoder reference.
      */
     IDirectXVideoDecoder *decoder_to_release;
 } AVDXVA2FramesContext;
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 815e084155..91a6d677e2 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -1711,6 +1711,10 @@ static const AVPixFmtDescriptor 
av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .flags = AV_PIX_FMT_FLAG_BE  | AV_PIX_FMT_FLAG_PLANAR |
                  AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
+    [AV_PIX_FMT_D3D11] = {
+        .name = "d3d11",
+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+    },
 };
 #if FF_API_PLUS1_MINUS1
 FF_ENABLE_DEPRECATION_WARNINGS
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 3c670fc9d4..8e57f562c5 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -205,7 +205,7 @@ enum AVPixelFormat {
      */
     AV_PIX_FMT_MMAL,
 
-    AV_PIX_FMT_D3D11VA_VLD,  ///< HW decoding through Direct3D11, 
Picture.data[3] contains a ID3D11VideoDecoderOutputView pointer
+    AV_PIX_FMT_D3D11VA_VLD,  ///< HW decoding through Direct3D11 via old API, 
Picture.data[3] contains a ID3D11VideoDecoderOutputView pointer
 
     /**
      * HW acceleration through CUDA. data[i] contain CUdeviceptr pointers
@@ -234,6 +234,8 @@ enum AVPixelFormat {
     AV_PIX_FMT_GRAY12BE,  ///<        Y        , 12bpp, big-endian
     AV_PIX_FMT_GRAY12LE,  ///<        Y        , 12bpp, little-endian
 
+    AV_PIX_FMT_D3D11,     ///< HW decoding through Direct3D11 via new API, 
Picture.data[0] contains a ID3D11Texture2D pointer, and data[1] contains the 
texture array index of the frame as intptr_t if the ID3D11Texture2D is an array 
texture (or 0 if it's a normal texture)
+
     AV_PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you 
want to link with shared libav* because the number of formats might differ 
between versions
 };
 
-- 
2.11.0

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 7/7] [WIP] dxva/d3d: add support for new hwaccel API

Reply via email to