PR #21295 opened by Steven Xiao (younengxiao)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21295
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21295.patch

This pull request implements ROI (Region of Interest) encoding support for 
D3D12VA hardware encoders, enabling spatially-adaptive quality control for 
H.264, HEVC, and AV1 encoders.
Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support 
during initialization to check whether the hardware support QP delta. If delta 
QP is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data and 
generate delta QP maps for each frame.

Sample command line:
```
ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf 
addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4
```


>From d0a539d9717a778b2654d32fc32e554ae557a4d5 Mon Sep 17 00:00:00 2001
From: stevxiao <[email protected]>
Date: Wed, 24 Dec 2025 23:43:26 -0500
Subject: [PATCH] avcodec/d3d12va_encode: add Region of Interest (ROI) support

This commit implements ROI (Region of Interest) encoding support for D3D12VA 
hardware encoders, enabling spatially-adaptive quality control for H.264, HEVC, 
and AV1 encoders.
Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support 
during initialization to check whether the hardware support QP delta.If QP 
delta is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data 
and generate delta QP maps for each frame.

Sample command line:
ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf 
addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4
---
 libavcodec/d3d12va_encode.c | 234 ++++++++++++++++++++++++++++++++++++
 libavcodec/d3d12va_encode.h |  10 ++
 2 files changed, 244 insertions(+)

diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
index de95518be5..e070a0b650 100644
--- a/libavcodec/d3d12va_encode.c
+++ b/libavcodec/d3d12va_encode.c
@@ -140,6 +140,93 @@ static int d3d12va_encode_wait(AVCodecContext *avctx,
     return 0;
 }
 
+static int d3d12va_encode_setup_roi(AVCodecContext *avctx,
+                                    D3D12VAEncodePicture *pic,
+                                    const uint8_t *data, size_t size)
+{
+    D3D12VAEncodeContext *ctx = avctx->priv_data;
+    const AVRegionOfInterest *roi;
+    uint32_t roi_size;
+    int nb_roi, i;
+    int block_width, block_height;
+    int block_size, qp_range;
+    int8_t *qp_map;
+
+    // Use the QP map region size reported by the driver
+    block_size = ctx->qp_map_region_size;
+
+    // Determine QP range based on codec
+    switch (ctx->codec->d3d12_codec) {
+        case D3D12_VIDEO_ENCODER_CODEC_H264:
+        case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+            qp_range = 51;
+            break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+        case D3D12_VIDEO_ENCODER_CODEC_AV1:
+            qp_range = 255;
+            break;
+#endif
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unsupported codec for ROI.\n");
+            return AVERROR(EINVAL);
+    }
+
+    // Calculate map dimensions using ceil division as required by D3D12
+    block_width  = (avctx->width + block_size - 1) / block_size;
+    block_height = (avctx->height + block_size - 1) / block_size;
+
+    // Allocate QP map (initialized to 0 for non-ROI areas)
+    qp_map = av_calloc(block_width * block_height, sizeof(*qp_map));
+    if (!qp_map)
+        return AVERROR(ENOMEM);
+
+    // Process ROI regions
+    roi = (const AVRegionOfInterest*)data;
+    roi_size = roi->self_size;
+    av_assert0(roi_size && size % roi_size == 0);
+    nb_roi = size / roi_size;
+
+    // Iterate in reverse for priority (first region in array takes priority 
on overlap)
+    for (i = nb_roi - 1; i >= 0; i--) {
+        int startx, endx, starty, endy;
+        int delta_qp;
+        int x, y;
+
+        roi = (const AVRegionOfInterest*)(data + roi_size * i);
+
+        // Convert pixel coordinates to block coordinates
+        starty = FFMIN(block_height, roi->top / block_size);
+        endy   = FFMIN(block_height, (roi->bottom + block_size - 1) / 
block_size);
+        startx = FFMIN(block_width, roi->left / block_size);
+        endx   = FFMIN(block_width, (roi->right + block_size - 1) / 
block_size);
+
+        if (roi->qoffset.den == 0) {
+            av_free(qp_map);
+            av_log(avctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must 
not be zero.\n");
+            return AVERROR(EINVAL);
+        }
+
+        // Convert qoffset to delta QP
+        delta_qp = roi->qoffset.num * qp_range / roi->qoffset.den;
+        delta_qp = av_clip_int8(delta_qp);
+
+        av_log(avctx, AV_LOG_DEBUG, "ROI: (%d,%d)-(%d,%d) -> %+d.\n",
+               roi->top, roi->left, roi->bottom, roi->right, delta_qp);
+
+        // Fill QP map for this ROI region
+        for (y = starty; y < endy; y++) {
+            for (x = startx; x < endx; x++) {
+                qp_map[x + y * block_width] = delta_qp;
+            }
+        }
+    }
+
+    pic->qp_map = qp_map;
+    pic->qp_map_size = block_width * block_height;
+
+    return 0;
+}
+
 static int d3d12va_encode_create_metadata_buffers(AVCodecContext *avctx,
                                                   D3D12VAEncodePicture *pic)
 {
@@ -366,6 +453,49 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
         }
     }
 
+    // Process ROI side data if present and supported
+
+    AVFrameSideData *sd = av_frame_get_side_data(base_pic->input_image,
+                                                    
AV_FRAME_DATA_REGIONS_OF_INTEREST);
+    if (sd && base_ctx->roi_allowed) {
+        err = d3d12va_encode_setup_roi(avctx, pic, sd->data, sd->size);
+        if (err < 0)
+            goto fail;
+
+        // Enable delta QP flag in rate control only if supported
+        input_args.SequenceControlDesc.RateControl.Flags |= 
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP;
+
+        // Set QP map in codec-specific picture control data
+        switch (ctx->codec->d3d12_codec) {
+            case D3D12_VIDEO_ENCODER_CODEC_H264:
+                if (pic->pic_ctl.pH264PicData) {
+                    pic->pic_ctl.pH264PicData->QPMapValuesCount  = 
pic->qp_map_size;
+                    pic->pic_ctl.pH264PicData->pRateControlQPMap = pic->qp_map;
+                }
+                break;
+            case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+                if (pic->pic_ctl.pHEVCPicData) {
+                    pic->pic_ctl.pHEVCPicData->QPMapValuesCount  = 
pic->qp_map_size;
+                    pic->pic_ctl.pHEVCPicData->pRateControlQPMap = pic->qp_map;
+                }
+                break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+            case D3D12_VIDEO_ENCODER_CODEC_AV1:
+                if (pic->pic_ctl.pAV1PicData) {
+                    pic->pic_ctl.pAV1PicData->QPMapValuesCount  = 
pic->qp_map_size;
+                    pic->pic_ctl.pAV1PicData->pRateControlQPMap = (INT16 
*)pic->qp_map;
+                }
+                break;
+#endif
+            default:
+                break;
+        }
+
+        av_log(avctx, AV_LOG_DEBUG, "ROI delta QP map created with %d blocks 
(region size: %d pixels).\n",
+                pic->qp_map_size, ctx->qp_map_region_size);
+    }
+
+
     input_args.PictureControlDesc.IntraRefreshFrameIndex  = 
ctx->intra_refresh_frame_index;
     if (base_pic->is_reference)
         input_args.PictureControlDesc.Flags |= 
D3D12_VIDEO_ENCODER_PICTURE_CONTROL_FLAG_USED_AS_REFERENCE_PICTURE;
@@ -669,6 +799,9 @@ static int d3d12va_encode_free(AVCodecContext *avctx, 
FFHWBaseEncodePicture *pic
     if (ctx->codec->free_picture_params)
         ctx->codec->free_picture_params(priv);
 
+    // Free ROI QP map if allocated
+    av_freep(&priv->qp_map);
+
     return 0;
 }
 
@@ -1318,6 +1451,103 @@ static int 
d3d12va_encode_init_gop_structure(AVCodecContext *avctx)
     return 0;
 }
 
+static int d3d12va_encode_init_roi(AVCodecContext* avctx)
+{
+    FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
+    D3D12VAEncodeContext       *ctx = avctx->priv_data;
+    AVD3D12VAFramesContext *frames_hwctx = base_ctx->input_frames->hwctx;
+    HRESULT hr;
+
+    D3D12_VIDEO_ENCODER_PROFILE_DESC      profile = { 0 };
+    D3D12_VIDEO_ENCODER_PROFILE_H264 h264_profile = 
D3D12_VIDEO_ENCODER_PROFILE_H264_MAIN;
+    D3D12_VIDEO_ENCODER_PROFILE_HEVC hevc_profile = 
D3D12_VIDEO_ENCODER_PROFILE_HEVC_MAIN;
+#if CONFIG_AV1_D3D12VA_ENCODER
+    D3D12_VIDEO_ENCODER_AV1_PROFILE   av1_profile = 
D3D12_VIDEO_ENCODER_AV1_PROFILE_MAIN;
+#endif
+
+    D3D12_VIDEO_ENCODER_LEVEL_SETTING                    level = { 0 };
+    D3D12_VIDEO_ENCODER_LEVELS_H264                 h264_level = { 0 };
+    D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC hevc_level = { 0 };
+#if CONFIG_AV1_D3D12VA_ENCODER
+    D3D12_VIDEO_ENCODER_AV1_LEVEL_TIER_CONSTRAINTS   av1_level = { 0 };
+#endif
+
+    // Initialize to defaults
+    ctx->qp_map_region_size = 0;
+    base_ctx->roi_allowed   = 0;
+
+    switch (ctx->codec->d3d12_codec) {
+        case D3D12_VIDEO_ENCODER_CODEC_H264:
+            profile.DataSize        = sizeof(D3D12_VIDEO_ENCODER_PROFILE_H264);
+            profile.pH264Profile    = &h264_profile;
+            level.DataSize          = sizeof(D3D12_VIDEO_ENCODER_LEVELS_H264);
+            level.pH264LevelSetting = &h264_level;
+            break;
+        case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+            profile.DataSize        = sizeof(D3D12_VIDEO_ENCODER_PROFILE_HEVC);
+            profile.pHEVCProfile    = &hevc_profile;
+            level.DataSize          = 
sizeof(D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC);
+            level.pHEVCLevelSetting = &hevc_level;
+            break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+        case D3D12_VIDEO_ENCODER_CODEC_AV1:
+            profile.DataSize        = sizeof(D3D12_VIDEO_ENCODER_AV1_PROFILE);
+            profile.pAV1Profile     = &av1_profile;
+            level.DataSize          = 
sizeof(D3D12_VIDEO_ENCODER_AV1_LEVEL_TIER_CONSTRAINTS);
+            level.pAV1LevelSetting  = &av1_level;
+            break;
+#endif
+        default:
+            av_assert0(0);
+    }
+
+    // Query encoder support to check if delta QP works with current 
configuration
+    D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT1 support = {
+        .NodeIndex                   = 0,
+        .Codec                       = ctx->codec->d3d12_codec,
+        .InputFormat                 = frames_hwctx->format,
+        .RateControl                 = ctx->rc,
+        .IntraRefresh                = ctx->intra_refresh.Mode,
+        .SubregionFrameEncoding      = 
D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME,
+        .ResolutionsListCount        = 1,
+        .pResolutionList             = &ctx->resolution,
+        .CodecGopSequence            = ctx->gop,
+        .MaxReferenceFramesInDPB     = MAX_DPB_SIZE - 1,
+        .CodecConfiguration          = ctx->codec_conf,
+        .SuggestedProfile            = profile,
+        .SuggestedLevel              = level,
+        .pResolutionDependentSupport = &ctx->res_limits,
+#if CONFIG_AV1_D3D12VA_ENCODER
+        .SubregionFrameEncodingData.pTilesPartition_AV1 = 
ctx->subregions_layout.pTilesPartition_AV1,
+#endif
+    };
+
+    hr = ID3D12VideoDevice3_CheckFeatureSupport(ctx->video_device3,
+                                                
D3D12_FEATURE_VIDEO_ENCODER_SUPPORT1,
+                                                &support, sizeof(support));
+    if (FAILED(hr)) {
+        av_log(avctx, AV_LOG_WARNING, "Failed to query encoder support for 
ROI, disabling ROI.\n");
+        return 0;
+    }
+
+    // Check if the configuration with DELTA_QP is supported
+    if ((support.SupportFlags & 
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_GENERAL_SUPPORT_OK) &&
+        (support.SupportFlags & 
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE)) {
+        base_ctx->roi_allowed = 1;
+        // Store the QP map region size from resolution limits
+        ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+
+        av_log(avctx, AV_LOG_VERBOSE, "ROI encoding is supported via delta QP "
+               "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+    } else {
+        av_log(avctx, AV_LOG_VERBOSE, "ROI encoding not supported by hardware 
for current rate control mode "
+               "(SupportFlags: 0x%x, ValidationFlags: 0x%x).\n",
+               support.SupportFlags, support.ValidationFlags);
+    }
+
+    return 0;
+}
+
 static int d3d12va_encode_init_intra_refresh(AVCodecContext *avctx)
 {
     FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
@@ -1770,6 +2000,10 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
     if (err < 0)
         goto fail;
 
+    err = d3d12va_encode_init_roi(avctx);
+    if (err < 0)
+        goto fail;
+
     if (ctx->codec->init_sequence_params) {
         err = ctx->codec->init_sequence_params(avctx);
         if (err < 0) {
diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h
index fcb97210b3..aec1abdc4f 100644
--- a/libavcodec/d3d12va_encode.h
+++ b/libavcodec/d3d12va_encode.h
@@ -57,6 +57,10 @@ typedef struct D3D12VAEncodePicture {
     D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl;
 
     int             fence_value;
+
+    // ROI delta QP map
+    int8_t         *qp_map;
+    int             qp_map_size;
 } D3D12VAEncodePicture;
 
 typedef struct D3D12VAEncodeProfile {
@@ -282,6 +286,12 @@ typedef struct D3D12VAEncodeContext {
      */
     D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE me_precision;
 
+
+    /**
+     * QP map region pixel size (block size for QP map)
+     */
+    int qp_map_region_size;
+
 } D3D12VAEncodeContext;
 
 typedef struct D3D12VAEncodeType {
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to