PR #21295 opened by Steven Xiao (younengxiao) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21295 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21295.patch
This pull request implements ROI (Region of Interest) encoding support for D3D12VA hardware encoders, enabling spatially-adaptive quality control for H.264, HEVC, and AV1 encoders. Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support during initialization to check whether the hardware support QP delta. If delta QP is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data and generate delta QP maps for each frame. Sample command line: ``` ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4 ``` >From d0a539d9717a778b2654d32fc32e554ae557a4d5 Mon Sep 17 00:00:00 2001 From: stevxiao <[email protected]> Date: Wed, 24 Dec 2025 23:43:26 -0500 Subject: [PATCH] avcodec/d3d12va_encode: add Region of Interest (ROI) support This commit implements ROI (Region of Interest) encoding support for D3D12VA hardware encoders, enabling spatially-adaptive quality control for H.264, HEVC, and AV1 encoders. Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support during initialization to check whether the hardware support QP delta.If QP delta is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data and generate delta QP maps for each frame. Sample command line: ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4 --- libavcodec/d3d12va_encode.c | 234 ++++++++++++++++++++++++++++++++++++ libavcodec/d3d12va_encode.h | 10 ++ 2 files changed, 244 insertions(+) diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c index de95518be5..e070a0b650 100644 --- a/libavcodec/d3d12va_encode.c +++ b/libavcodec/d3d12va_encode.c @@ -140,6 +140,93 @@ static int d3d12va_encode_wait(AVCodecContext *avctx, return 0; } +static int d3d12va_encode_setup_roi(AVCodecContext *avctx, + D3D12VAEncodePicture *pic, + const uint8_t *data, size_t size) +{ + D3D12VAEncodeContext *ctx = avctx->priv_data; + const AVRegionOfInterest *roi; + uint32_t roi_size; + int nb_roi, i; + int block_width, block_height; + int block_size, qp_range; + int8_t *qp_map; + + // Use the QP map region size reported by the driver + block_size = ctx->qp_map_region_size; + + // Determine QP range based on codec + switch (ctx->codec->d3d12_codec) { + case D3D12_VIDEO_ENCODER_CODEC_H264: + case D3D12_VIDEO_ENCODER_CODEC_HEVC: + qp_range = 51; + break; +#if CONFIG_AV1_D3D12VA_ENCODER + case D3D12_VIDEO_ENCODER_CODEC_AV1: + qp_range = 255; + break; +#endif + default: + av_log(avctx, AV_LOG_ERROR, "Unsupported codec for ROI.\n"); + return AVERROR(EINVAL); + } + + // Calculate map dimensions using ceil division as required by D3D12 + block_width = (avctx->width + block_size - 1) / block_size; + block_height = (avctx->height + block_size - 1) / block_size; + + // Allocate QP map (initialized to 0 for non-ROI areas) + qp_map = av_calloc(block_width * block_height, sizeof(*qp_map)); + if (!qp_map) + return AVERROR(ENOMEM); + + // Process ROI regions + roi = (const AVRegionOfInterest*)data; + roi_size = roi->self_size; + av_assert0(roi_size && size % roi_size == 0); + nb_roi = size / roi_size; + + // Iterate in reverse for priority (first region in array takes priority on overlap) + for (i = nb_roi - 1; i >= 0; i--) { + int startx, endx, starty, endy; + int delta_qp; + int x, y; + + roi = (const AVRegionOfInterest*)(data + roi_size * i); + + // Convert pixel coordinates to block coordinates + starty = FFMIN(block_height, roi->top / block_size); + endy = FFMIN(block_height, (roi->bottom + block_size - 1) / block_size); + startx = FFMIN(block_width, roi->left / block_size); + endx = FFMIN(block_width, (roi->right + block_size - 1) / block_size); + + if (roi->qoffset.den == 0) { + av_free(qp_map); + av_log(avctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must not be zero.\n"); + return AVERROR(EINVAL); + } + + // Convert qoffset to delta QP + delta_qp = roi->qoffset.num * qp_range / roi->qoffset.den; + delta_qp = av_clip_int8(delta_qp); + + av_log(avctx, AV_LOG_DEBUG, "ROI: (%d,%d)-(%d,%d) -> %+d.\n", + roi->top, roi->left, roi->bottom, roi->right, delta_qp); + + // Fill QP map for this ROI region + for (y = starty; y < endy; y++) { + for (x = startx; x < endx; x++) { + qp_map[x + y * block_width] = delta_qp; + } + } + } + + pic->qp_map = qp_map; + pic->qp_map_size = block_width * block_height; + + return 0; +} + static int d3d12va_encode_create_metadata_buffers(AVCodecContext *avctx, D3D12VAEncodePicture *pic) { @@ -366,6 +453,49 @@ static int d3d12va_encode_issue(AVCodecContext *avctx, } } + // Process ROI side data if present and supported + + AVFrameSideData *sd = av_frame_get_side_data(base_pic->input_image, + AV_FRAME_DATA_REGIONS_OF_INTEREST); + if (sd && base_ctx->roi_allowed) { + err = d3d12va_encode_setup_roi(avctx, pic, sd->data, sd->size); + if (err < 0) + goto fail; + + // Enable delta QP flag in rate control only if supported + input_args.SequenceControlDesc.RateControl.Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP; + + // Set QP map in codec-specific picture control data + switch (ctx->codec->d3d12_codec) { + case D3D12_VIDEO_ENCODER_CODEC_H264: + if (pic->pic_ctl.pH264PicData) { + pic->pic_ctl.pH264PicData->QPMapValuesCount = pic->qp_map_size; + pic->pic_ctl.pH264PicData->pRateControlQPMap = pic->qp_map; + } + break; + case D3D12_VIDEO_ENCODER_CODEC_HEVC: + if (pic->pic_ctl.pHEVCPicData) { + pic->pic_ctl.pHEVCPicData->QPMapValuesCount = pic->qp_map_size; + pic->pic_ctl.pHEVCPicData->pRateControlQPMap = pic->qp_map; + } + break; +#if CONFIG_AV1_D3D12VA_ENCODER + case D3D12_VIDEO_ENCODER_CODEC_AV1: + if (pic->pic_ctl.pAV1PicData) { + pic->pic_ctl.pAV1PicData->QPMapValuesCount = pic->qp_map_size; + pic->pic_ctl.pAV1PicData->pRateControlQPMap = (INT16 *)pic->qp_map; + } + break; +#endif + default: + break; + } + + av_log(avctx, AV_LOG_DEBUG, "ROI delta QP map created with %d blocks (region size: %d pixels).\n", + pic->qp_map_size, ctx->qp_map_region_size); + } + + input_args.PictureControlDesc.IntraRefreshFrameIndex = ctx->intra_refresh_frame_index; if (base_pic->is_reference) input_args.PictureControlDesc.Flags |= D3D12_VIDEO_ENCODER_PICTURE_CONTROL_FLAG_USED_AS_REFERENCE_PICTURE; @@ -669,6 +799,9 @@ static int d3d12va_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic if (ctx->codec->free_picture_params) ctx->codec->free_picture_params(priv); + // Free ROI QP map if allocated + av_freep(&priv->qp_map); + return 0; } @@ -1318,6 +1451,103 @@ static int d3d12va_encode_init_gop_structure(AVCodecContext *avctx) return 0; } +static int d3d12va_encode_init_roi(AVCodecContext* avctx) +{ + FFHWBaseEncodeContext *base_ctx = avctx->priv_data; + D3D12VAEncodeContext *ctx = avctx->priv_data; + AVD3D12VAFramesContext *frames_hwctx = base_ctx->input_frames->hwctx; + HRESULT hr; + + D3D12_VIDEO_ENCODER_PROFILE_DESC profile = { 0 }; + D3D12_VIDEO_ENCODER_PROFILE_H264 h264_profile = D3D12_VIDEO_ENCODER_PROFILE_H264_MAIN; + D3D12_VIDEO_ENCODER_PROFILE_HEVC hevc_profile = D3D12_VIDEO_ENCODER_PROFILE_HEVC_MAIN; +#if CONFIG_AV1_D3D12VA_ENCODER + D3D12_VIDEO_ENCODER_AV1_PROFILE av1_profile = D3D12_VIDEO_ENCODER_AV1_PROFILE_MAIN; +#endif + + D3D12_VIDEO_ENCODER_LEVEL_SETTING level = { 0 }; + D3D12_VIDEO_ENCODER_LEVELS_H264 h264_level = { 0 }; + D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC hevc_level = { 0 }; +#if CONFIG_AV1_D3D12VA_ENCODER + D3D12_VIDEO_ENCODER_AV1_LEVEL_TIER_CONSTRAINTS av1_level = { 0 }; +#endif + + // Initialize to defaults + ctx->qp_map_region_size = 0; + base_ctx->roi_allowed = 0; + + switch (ctx->codec->d3d12_codec) { + case D3D12_VIDEO_ENCODER_CODEC_H264: + profile.DataSize = sizeof(D3D12_VIDEO_ENCODER_PROFILE_H264); + profile.pH264Profile = &h264_profile; + level.DataSize = sizeof(D3D12_VIDEO_ENCODER_LEVELS_H264); + level.pH264LevelSetting = &h264_level; + break; + case D3D12_VIDEO_ENCODER_CODEC_HEVC: + profile.DataSize = sizeof(D3D12_VIDEO_ENCODER_PROFILE_HEVC); + profile.pHEVCProfile = &hevc_profile; + level.DataSize = sizeof(D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC); + level.pHEVCLevelSetting = &hevc_level; + break; +#if CONFIG_AV1_D3D12VA_ENCODER + case D3D12_VIDEO_ENCODER_CODEC_AV1: + profile.DataSize = sizeof(D3D12_VIDEO_ENCODER_AV1_PROFILE); + profile.pAV1Profile = &av1_profile; + level.DataSize = sizeof(D3D12_VIDEO_ENCODER_AV1_LEVEL_TIER_CONSTRAINTS); + level.pAV1LevelSetting = &av1_level; + break; +#endif + default: + av_assert0(0); + } + + // Query encoder support to check if delta QP works with current configuration + D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT1 support = { + .NodeIndex = 0, + .Codec = ctx->codec->d3d12_codec, + .InputFormat = frames_hwctx->format, + .RateControl = ctx->rc, + .IntraRefresh = ctx->intra_refresh.Mode, + .SubregionFrameEncoding = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME, + .ResolutionsListCount = 1, + .pResolutionList = &ctx->resolution, + .CodecGopSequence = ctx->gop, + .MaxReferenceFramesInDPB = MAX_DPB_SIZE - 1, + .CodecConfiguration = ctx->codec_conf, + .SuggestedProfile = profile, + .SuggestedLevel = level, + .pResolutionDependentSupport = &ctx->res_limits, +#if CONFIG_AV1_D3D12VA_ENCODER + .SubregionFrameEncodingData.pTilesPartition_AV1 = ctx->subregions_layout.pTilesPartition_AV1, +#endif + }; + + hr = ID3D12VideoDevice3_CheckFeatureSupport(ctx->video_device3, + D3D12_FEATURE_VIDEO_ENCODER_SUPPORT1, + &support, sizeof(support)); + if (FAILED(hr)) { + av_log(avctx, AV_LOG_WARNING, "Failed to query encoder support for ROI, disabling ROI.\n"); + return 0; + } + + // Check if the configuration with DELTA_QP is supported + if ((support.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_GENERAL_SUPPORT_OK) && + (support.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE)) { + base_ctx->roi_allowed = 1; + // Store the QP map region size from resolution limits + ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize; + + av_log(avctx, AV_LOG_VERBOSE, "ROI encoding is supported via delta QP " + "(QP map region size: %d pixels).\n", ctx->qp_map_region_size); + } else { + av_log(avctx, AV_LOG_VERBOSE, "ROI encoding not supported by hardware for current rate control mode " + "(SupportFlags: 0x%x, ValidationFlags: 0x%x).\n", + support.SupportFlags, support.ValidationFlags); + } + + return 0; +} + static int d3d12va_encode_init_intra_refresh(AVCodecContext *avctx) { FFHWBaseEncodeContext *base_ctx = avctx->priv_data; @@ -1770,6 +2000,10 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx) if (err < 0) goto fail; + err = d3d12va_encode_init_roi(avctx); + if (err < 0) + goto fail; + if (ctx->codec->init_sequence_params) { err = ctx->codec->init_sequence_params(avctx); if (err < 0) { diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h index fcb97210b3..aec1abdc4f 100644 --- a/libavcodec/d3d12va_encode.h +++ b/libavcodec/d3d12va_encode.h @@ -57,6 +57,10 @@ typedef struct D3D12VAEncodePicture { D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl; int fence_value; + + // ROI delta QP map + int8_t *qp_map; + int qp_map_size; } D3D12VAEncodePicture; typedef struct D3D12VAEncodeProfile { @@ -282,6 +286,12 @@ typedef struct D3D12VAEncodeContext { */ D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE me_precision; + + /** + * QP map region pixel size (block size for QP map) + */ + int qp_map_region_size; + } D3D12VAEncodeContext; typedef struct D3D12VAEncodeType { -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
