Araz Iusubov: > -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-boun...@ffmpeg.org> On Behalf Of Araz > Iusubov > Sent: Friday, May 16, 2025 5:44 PM > To: ffmpeg-devel@ffmpeg.org > Cc: Araz Iusubov <primeadv...@gmail.com> > Subject: [FFmpeg-devel] [PATCH, v2] avcodec/d3d12va_encode: texture array > support for HEVC > > This patch adds support for the texture array feature used by AMD boards in > the > D3D12 HEVC encoder. > In texture array mode, a single texture array is shared for all reference and > reconstructed pictures using different subresources. > The implementation ensures compatibility and has been successfully tested on > AMD, Intel, and NVIDIA GPUs. > > v2 updates: > 1. The reference to MaxL1ReferencesForB for the H.264 codec was updated to > use the corresponding H.264 field instead of the HEVC one. > 2. Max_subresource_array_size calculation was adjusted by removing the > D3D12VA_VIDEO_ENC_ASYNC_DEPTH offset. > > --- > libavcodec/d3d12va_encode.c | 241 +++++++++++++++++++++++++------ > libavcodec/d3d12va_encode.h | 29 ++++ > libavcodec/d3d12va_encode_hevc.c | 5 +- > 3 files changed, 231 insertions(+), 44 deletions(-) > > diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c index > 4d738200fe..d428ad1fd8 100644 > --- a/libavcodec/d3d12va_encode.c > +++ b/libavcodec/d3d12va_encode.c > @@ -264,6 +264,11 @@ static int d3d12va_encode_issue(AVCodecContext > *avctx, > > av_log(avctx, AV_LOG_DEBUG, "Input surface is %p.\n", pic->input_surface- > >texture); > > + if (ctx->is_texture_array) { > + base_pic->recon_image->data[0] = ctx->texture_array_frame; > + pic->subresource_index = (ctx->subresource_used_index++) % ctx- > >max_subresource_array_size; > + } > + > pic->recon_surface = (AVD3D12VAFrame *)base_pic->recon_image->data[0]; > av_log(avctx, AV_LOG_DEBUG, "Recon surface is %p.\n", > pic->recon_surface->texture); @@ -325,11 +330,28 @@ static int > d3d12va_encode_issue(AVCodecContext *avctx, > goto fail; > } > > + if (ctx->is_texture_array) { > + d3d12_refs.pSubresources = av_calloc(d3d12_refs.NumTexture2Ds, > + > sizeof(*d3d12_refs.pSubresources)); > + if (!d3d12_refs.pSubresources) { > + err = AVERROR(ENOMEM); > + goto fail; > + } > + } > + > i = 0; > - for (j = 0; j < base_pic->nb_refs[0]; j++) > - d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic- > >refs[0][j]->priv)->recon_surface->texture; > - for (j = 0; j < base_pic->nb_refs[1]; j++) > - d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic- > >refs[1][j]->priv)->recon_surface->texture; > + for (j = 0; j < base_pic->nb_refs[0]; j++) { > + d3d12_refs.ppTexture2Ds[i] = ((D3D12VAEncodePicture *)base_pic- > >refs[0][j]->priv)->recon_surface->texture; > + if (ctx->is_texture_array) > + d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture > *)base_pic- > >refs[0][j]->priv)->subresource_index; > + i++; > + } > + for (j = 0; j < base_pic->nb_refs[1]; j++) { > + d3d12_refs.ppTexture2Ds[i] = ((D3D12VAEncodePicture *)base_pic- > >refs[1][j]->priv)->recon_surface->texture; > + if (ctx->is_texture_array) > + d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture > *)base_pic- > >refs[1][j]->priv)->subresource_index; > + i++; > + } > } > > input_args.PictureControlDesc.IntraRefreshFrameIndex = 0; @@ -343,7 > +365,11 @@ static int d3d12va_encode_issue(AVCodecContext *avctx, > output_args.Bitstream.pBuffer = > pic->output_buffer; > output_args.Bitstream.FrameStartOffset = pic- > >aligned_header_size; > output_args.ReconstructedPicture.pReconstructedPicture = pic- > >recon_surface->texture; > - output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0; > + if (ctx->is_texture_array) { > + output_args.ReconstructedPicture.ReconstructedPictureSubresource = > pic- > >subresource_index; > + } else { > + output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0; > + } > output_args.EncoderOutputMetadata.pBuffer = pic- > >encoded_metadata; > output_args.EncoderOutputMetadata.Offset = 0; > > @@ -381,35 +407,87 @@ static int d3d12va_encode_issue(AVCodecContext > *avctx, > }, \ > } > > +#define TRANSITION_BARRIER_SUBRESOURCE(res, subres,before, after) \ > + (D3D12_RESOURCE_BARRIER) { \ > + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, \ > + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, \ > + .Transition = { \ > + .pResource = res, \ > + .Subresource = subres, \ > + .StateBefore = before, \ > + .StateAfter = after, \ > + }, \ > + } > + > barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture, > D3D12_RESOURCE_STATE_COMMON, > D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ); > barriers[1] = TRANSITION_BARRIER(pic->output_buffer, > D3D12_RESOURCE_STATE_COMMON, > > D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE); > - barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture, > + barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata, > D3D12_RESOURCE_STATE_COMMON, > > D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE); > - barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata, > + barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata, > D3D12_RESOURCE_STATE_COMMON, > > D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE); > - barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata, > - D3D12_RESOURCE_STATE_COMMON, > - > D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE); > - > - ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers); > - > - if (d3d12_refs.NumTexture2Ds) { > - D3D12_RESOURCE_BARRIER refs_barriers[3]; > - > - for (i = 0; i < d3d12_refs.NumTexture2Ds; i++) > - refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i], > - > D3D12_RESOURCE_STATE_COMMON, > - > D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ); > - > - ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, > d3d12_refs.NumTexture2Ds, > - refs_barriers); > + ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4, > + barriers); > + > + //set transit barriers for reference pic and recon pic > + int barriers_ref_index = 0; > + D3D12_RESOURCE_BARRIER *barriers_ref = NULL; > + if (ctx->is_texture_array) { > + barriers_ref = av_calloc(ctx->max_subresource_array_size * ctx- > >plane_count, > + sizeof(D3D12_RESOURCE_BARRIER)); > + } else { > + barriers_ref = > av_calloc(MAX_DPB_SIZE,sizeof(D3D12_RESOURCE_BARRIER)); > + } > + > + if (ctx->is_texture_array) { > + // In Texture array mode, the D3D12 uses the same texture array for > all the > input > + // reference pics in ppTexture2Ds and also for the > pReconstructedPicture > output allocations, > + //just different subresources. > + D3D12_RESOURCE_DESC references_tex_array_desc = { 0 }; > + > + pic->recon_surface->texture->lpVtbl->GetDesc(pic->recon_surface->textu > + re, &references_tex_array_desc); > + > + for (uint32_t reference_subresource = 0; reference_subresource < > references_tex_array_desc.DepthOrArraySize; > + reference_subresource++) { > + > + //D3D12 DecomposeSubresource > + uint32_t mip_slice, plane_slice, array_slice, array_size; > + array_size = references_tex_array_desc.DepthOrArraySize; > + mip_slice = reference_subresource % > references_tex_array_desc.MipLevels; > + array_slice = (reference_subresource / > + references_tex_array_desc.MipLevels) % array_size; > + > + for (plane_slice = 0; plane_slice < ctx->plane_count; > plane_slice++) { > + //Calculate the subresource index > + uint32_t planeOutputSubresource = mip_slice + array_slice * > references_tex_array_desc.MipLevels + > + plane_slice * > + references_tex_array_desc.MipLevels * array_size; > + > + if (reference_subresource == pic->subresource_index) { > + barriers_ref[barriers_ref_index++] = > TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture, > planeOutputSubresource, > + D3D12_RESOURCE_STATE_COMMON, > + > D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE); > + } else { > + barriers_ref[barriers_ref_index++] = > TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture, > planeOutputSubresource, > + D3D12_RESOURCE_STATE_COMMON, > + > D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ); > + } > + } > + } > + } else { > + barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER(pic- > >recon_surface->texture, > + D3D12_RESOURCE_STATE_COMMON, > + > + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE); > + > + if (d3d12_refs.NumTexture2Ds) { > + for (i = 0; i < d3d12_refs.NumTexture2Ds; i++) > + barriers_ref[barriers_ref_index++] = > TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i], > + > D3D12_RESOURCE_STATE_COMMON, > + > D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ); > + } > } > + ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, > + barriers_ref_index, barriers_ref); > > ID3D12VideoEncodeCommandList2_EncodeFrame(cmd_list, ctx->encoder, ctx- > >encoder_heap, > &input_args, &output_args); @@ > -422,16 +500,15 @@ > static int d3d12va_encode_issue(AVCodecContext *avctx, > > > ID3D12VideoEncodeCommandList2_ResolveEncoderOutputMetadata(cmd_list, > &input_metadata, &output_metadata); > > - if (d3d12_refs.NumTexture2Ds) { > - D3D12_RESOURCE_BARRIER refs_barriers[3]; > - > - for (i = 0; i < d3d12_refs.NumTexture2Ds; i++) > - refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i], > - > D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ, > - > D3D12_RESOURCE_STATE_COMMON); > - > - ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, > d3d12_refs.NumTexture2Ds, > - refs_barriers); > + //swap the barriers_ref transition state > + if (barriers_ref_index > 0) { > + for (i = 0; i < barriers_ref_index; i++) { > + D3D12_RESOURCE_STATES temp_statue = > barriers_ref[i].Transition.StateBefore; > + barriers_ref[i].Transition.StateBefore = > barriers_ref[i].Transition.StateAfter; > + barriers_ref[i].Transition.StateAfter = temp_statue; > + } > + ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, > barriers_ref_index, > + barriers_ref); > } > > barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture, > @@ -440,17 +517,14 @@ static int d3d12va_encode_issue(AVCodecContext > *avctx, > barriers[1] = TRANSITION_BARRIER(pic->output_buffer, > D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE, > D3D12_RESOURCE_STATE_COMMON); > - barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture, > - D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE, > - D3D12_RESOURCE_STATE_COMMON); > - barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata, > + barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata, > D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ, > D3D12_RESOURCE_STATE_COMMON); > - barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata, > + barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata, > D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE, > D3D12_RESOURCE_STATE_COMMON); > > - ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers); > + ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4, > + barriers); > > hr = ID3D12VideoEncodeCommandList2_Close(cmd_list); > if (FAILED(hr)) { > @@ -489,6 +563,14 @@ static int d3d12va_encode_issue(AVCodecContext > *avctx, > if (d3d12_refs.ppTexture2Ds) > av_freep(&d3d12_refs.ppTexture2Ds); > > + if (ctx->is_texture_array) { > + if (d3d12_refs.pSubresources) > + av_freep(&d3d12_refs.pSubresources); > + } > + > + if (barriers_ref) > + av_freep(&barriers_ref); > + > return 0; > > fail: > @@ -498,6 +580,14 @@ fail: > if (d3d12_refs.ppTexture2Ds) > av_freep(&d3d12_refs.ppTexture2Ds); > > + if (ctx->is_texture_array) { > + if (d3d12_refs.pSubresources) > + av_freep(&d3d12_refs.pSubresources); > + } > + > + if (barriers_ref) > + av_freep(&barriers_ref); > + > if (ctx->codec->free_picture_params) > ctx->codec->free_picture_params(pic); > > @@ -1088,13 +1178,15 @@ static int > d3d12va_encode_init_gop_structure(AVCodecContext *avctx) > switch (ctx->codec->d3d12_codec) { > case D3D12_VIDEO_ENCODER_CODEC_H264: > ref_l0 = FFMIN(support.PictureSupport.pH264Support- > >MaxL0ReferencesForP, > - support.PictureSupport.pH264Support- > >MaxL1ReferencesForB); > + support.PictureSupport.pH264Support- > >MaxL1ReferencesForB ? > + > + support.PictureSupport.pH264Support->MaxL1ReferencesForB : UINT_MAX); > ref_l1 = > support.PictureSupport.pH264Support->MaxL1ReferencesForB; > break; > > case D3D12_VIDEO_ENCODER_CODEC_HEVC: > ref_l0 = FFMIN(support.PictureSupport.pHEVCSupport- > >MaxL0ReferencesForP, > - support.PictureSupport.pHEVCSupport- > >MaxL1ReferencesForB); > + support.PictureSupport.pHEVCSupport- > >MaxL1ReferencesForB ? > + > + support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB : UINT_MAX); > ref_l1 = > support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB; > break; > > @@ -1336,6 +1428,47 @@ fail: > return err; > } > > +static int d3d12va_create_texture_array(AVHWFramesContext *ctx, > +D3D12VAEncodeContext *encode_context) { > + AVD3D12VAFramesContext *hwctx = ctx->hwctx; > + AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx->hwctx; > + > + AVD3D12VAFrame *frame; > + D3D12_HEAP_PROPERTIES props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; > + > + encode_context->max_subresource_array_size = MAX_DPB_SIZE + 1; > + > + D3D12_RESOURCE_DESC desc = { > + .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D, > + .Alignment = 0, > + .Width = ctx->width, > + .Height = ctx->height, > + .DepthOrArraySize = encode_context->max_subresource_array_size, > + .MipLevels = 1, > + .Format = hwctx->format, > + .SampleDesc = {.Count = 1, .Quality = 0 }, > + .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, > + .Flags = hwctx->flags, > + }; > + > + frame = av_mallocz(sizeof(AVD3D12VAFrame)); > + if (!frame) > + return AVERROR(ENOMEM); > + > + if (FAILED(ID3D12Device_CreateCommittedResource(device_hwctx->device, > &props, D3D12_HEAP_FLAG_NONE, &desc, > + D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void > **)&frame->texture))) { > + av_log(ctx, AV_LOG_ERROR, "Could not create the texture\n"); > + return AVERROR(EINVAL); > + } > + > + ID3D12Device_CreateFence(device_hwctx->device, 0, > D3D12_FENCE_FLAG_NONE, > + &IID_ID3D12Fence, (void > + **)&frame->sync_ctx.fence); > + > + frame->sync_ctx.event = CreateEvent(NULL, FALSE, FALSE, NULL); > + encode_context->texture_array_frame = frame; > + return 0; > +} > + > static int d3d12va_encode_create_recon_frames(AVCodecContext *avctx) { > FFHWBaseEncodeContext *base_ctx = avctx->priv_data; @@ -1394,6 +1527,7 > @@ int ff_d3d12va_encode_init(AVCodecContext *avctx) > FFHWBaseEncodeContext *base_ctx = avctx->priv_data; > D3D12VAEncodeContext *ctx = avctx->priv_data; > D3D12_FEATURE_DATA_VIDEO_FEATURE_AREA_SUPPORT support = { 0 }; > + D3D12_FEATURE_DATA_FORMAT_INFO format_info = {0}; > int err; > HRESULT hr; > > @@ -1429,6 +1563,15 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx) > goto fail; > } > > + format_info.Format = ((AVD3D12VAFramesContext*)base_ctx->input_frames- > >hwctx)->format; > + if (FAILED(ID3D12VideoDevice_CheckFeatureSupport(ctx->hwctx->device, > D3D12_FEATURE_FORMAT_INFO, > + &format_info, sizeof(format_info)))) { > + av_log(avctx, AV_LOG_ERROR, "Failed to query format plane count: > 0x%x\n", hr); > + err = AVERROR_EXTERNAL; > + goto fail; > + } > + ctx->plane_count = format_info.PlaneCount; > + > err = d3d12va_encode_set_profile(avctx); > if (err < 0) > goto fail; > @@ -1485,6 +1628,10 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx) > goto fail; > } > > + if (ctx->is_texture_array) { > + d3d12va_create_texture_array(base_ctx->recon_frames, avctx- > >priv_data); > + } > + > base_ctx->output_delay = base_ctx->b_per_p; > base_ctx->decode_delay = base_ctx->max_b_depth; > > @@ -1528,6 +1675,18 @@ int ff_d3d12va_encode_close(AVCodecContext > *avctx) > > av_buffer_pool_uninit(&ctx->output_buffer_pool); > > + if (ctx->is_texture_array) { > + ID3D12Resource *pResource = ctx->texture_array_frame->texture; > + if (pResource) { > + D3D12_OBJECT_RELEASE(pResource); > + ctx->texture_array_frame->texture = NULL; > + } > + D3D12_OBJECT_RELEASE(ctx->texture_array_frame->sync_ctx.fence); > + if (ctx->texture_array_frame->sync_ctx.event) > + CloseHandle(ctx->texture_array_frame->sync_ctx.event); > + av_free(ctx->texture_array_frame); > + } > + > D3D12_OBJECT_RELEASE(ctx->command_list); > D3D12_OBJECT_RELEASE(ctx->command_queue); > > diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h index > 3b0b8153d5..fc31857f1a 100644 > --- a/libavcodec/d3d12va_encode.h > +++ b/libavcodec/d3d12va_encode.h > @@ -52,6 +52,8 @@ typedef struct D3D12VAEncodePicture { > ID3D12Resource *encoded_metadata; > ID3D12Resource *resolved_metadata; > > + int subresource_index; > + > D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl; > > int fence_value; > @@ -189,6 +191,33 @@ typedef struct D3D12VAEncodeContext { > */ > AVBufferPool *output_buffer_pool; > > + /** > + * Flag indicates if the HW is texture array mode. > + */ > + int is_texture_array; > + > + /** > + * In texture array mode, the D3D12 uses the same texture array for all > the > input > + * reference pics in ppTexture2Ds and also for the pReconstructedPicture > output > + * allocations, just different subresources. > + */ > + AVD3D12VAFrame *texture_array_frame; > + > + /** > + * The max number of subresources in the texture array. > + */ > + int max_subresource_array_size; > + > + /** > + * The used subresource index for pic in the texture array. > + */ > + int subresource_used_index; > + > + /** > + * The number of planes in the input DXGI FORMAT . > + */ > + int plane_count; > + > /** > * D3D12 video encoder. > */ > diff --git a/libavcodec/d3d12va_encode_hevc.c > b/libavcodec/d3d12va_encode_hevc.c > index 938ba01f54..7e1d973f7e 100644 > --- a/libavcodec/d3d12va_encode_hevc.c > +++ b/libavcodec/d3d12va_encode_hevc.c > @@ -280,9 +280,8 @@ static int > d3d12va_encode_hevc_init_sequence_params(AVCodecContext *avctx) > } > > if (support.SupportFlags & > D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RECONSTRUCTED_FRAMES_REQUIR > E_TEXTURE_ARRAYS) { > - av_log(avctx, AV_LOG_ERROR, "D3D12 video encode on this device > requires > texture array support, " > - "but it's not implemented.\n"); > - return AVERROR_PATCHWELCOME; > + ctx->is_texture_array = 1; > + av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device > + uses texture array mode.\n"); > } > > desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format); > --
Hi Araz, Could you please fix the compile error for gcc compiler and resubmit another version? https://patchwork.ffmpeg.org/project/ffmpeg/patch/20250516094356.372-1-primeadv...@gmail.com/ Tong _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".