PR #23580 opened by Steven Xiao (younengxiao) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23580 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23580.patch
Add a "tiles" option (WxH format) to the D3D12VA AV1 encoder. When unset, the encoder falls back to the minimum number of tile columns/rows that satisfies AV1 spec constraints. Multi-tile support is also a prerequisite for 8K encoding. Example usage: - default (minimum tiles, auto-computed): ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -c:v av1_d3d12va output.mp4 - 2x2 tile (4 tiles): ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -c:v av1_d3d12va -tiles 2x2 output.mp4 - 4x4 tile - required for 8K (7680x4320): ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input_8k.mp4 -c:v av1_d3d12va -tiles 4x4 output_8k.mp4 >From 8b48951308229ab74cff446737ec4ac4c3266d23 Mon Sep 17 00:00:00 2001 From: younengxiao <[email protected]> Date: Tue, 23 Jun 2026 22:18:39 -0400 Subject: [PATCH] avcodec/d3d12va_encode_av1: add user-configurable tile support Add a "tiles" option (WxH format) to the D3D12VA AV1 encoder. When unset, the encoder falls back to the minimum number of tile columns/rows that satisfies AV1 spec constraints. Multi-tile support is also a prerequisite for 8K encoding. Example usage: # default (minimum tiles, auto-computed): ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -c:v av1_d3d12va output.mp4 # 2x2 tile (4 tiles): ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -c:v av1_d3d12va -tiles 2x2 output.mp4 # 4x4 tile - required for 8K (7680x4320): ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input_8k.mp4 -c:v av1_d3d12va -tiles 4x4 output_8k.mp4 Signed-off-by: younengxiao <[email protected]> --- libavcodec/d3d12va_encode.c | 18 +- libavcodec/d3d12va_encode.h | 4 + libavcodec/d3d12va_encode_av1.c | 436 ++++++++++++++++++++++++++------ 3 files changed, 369 insertions(+), 89 deletions(-) diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c index 2cca179b83..d7125db929 100644 --- a/libavcodec/d3d12va_encode.c +++ b/libavcodec/d3d12va_encode.c @@ -266,7 +266,9 @@ static int d3d12va_encode_create_metadata_buffers(AVCodecContext *avctx, D3D12VAEncodePicture *pic) { D3D12VAEncodeContext *ctx = avctx->priv_data; - int width = sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA) + sizeof(D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA); + int num_subregions = ctx->num_subregions > 0 ? ctx->num_subregions : 1; + int width = sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA) + + sizeof(D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA) * num_subregions; #if CONFIG_AV1_D3D12VA_ENCODER if (ctx->codec->d3d12_codec == D3D12_VIDEO_ENCODER_CODEC_AV1) { width += sizeof(D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES) @@ -346,7 +348,7 @@ static int d3d12va_encode_issue(AVCodecContext *avctx, .IntraRefreshConfig = ctx->intra_refresh, .RateControl = ctx->rc, .PictureTargetResolution = ctx->resolution, - .SelectedLayoutMode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME, + .SelectedLayoutMode = ctx->subregion_mode, .FrameSubregionsLayoutData = ctx->subregions_layout, .CodecGopSequence = ctx->gop, }, @@ -1777,12 +1779,6 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx) if (err < 0) goto fail; - if (ctx->codec->set_tile) { - err = ctx->codec->set_tile(avctx); - if (err < 0) - goto fail; - } - err = d3d12va_encode_init_rate_control(avctx); if (err < 0) goto fail; @@ -1793,6 +1789,12 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx) goto fail; } + if (ctx->codec->set_tile) { + err = ctx->codec->set_tile(avctx); + if (err < 0) + goto fail; + } + err = d3d12va_encode_init_gop_structure(avctx); if (err < 0) goto fail; diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h index f059ae9806..f2e97da7d2 100644 --- a/libavcodec/d3d12va_encode.h +++ b/libavcodec/d3d12va_encode.h @@ -271,6 +271,8 @@ typedef struct D3D12VAEncodeContext { D3D12_VIDEO_ENCODER_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA subregions_layout; + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE subregion_mode; + /** * Intra refresh configuration */ @@ -290,6 +292,8 @@ typedef struct D3D12VAEncodeContext { * QP map region pixel size (block size for QP map) */ int qp_map_region_size; + + int num_subregions; } D3D12VAEncodeContext; typedef struct D3D12VAEncodeType { diff --git a/libavcodec/d3d12va_encode_av1.c b/libavcodec/d3d12va_encode_av1.c index 8d2edec491..14ffcd9491 100644 --- a/libavcodec/d3d12va_encode_av1.c +++ b/libavcodec/d3d12va_encode_av1.c @@ -103,6 +103,16 @@ typedef struct D3D12VAEncodeAV1Context { CodedBitstreamFragment current_obu; D3D12_VIDEO_ENCODER_AV1_POST_ENCODE_VALUES_FLAGS post_encode_values_flag; AVFifo *picture_header_list; + + int tile_cols; + int tile_rows; + int tile_cols_log2; + int tile_rows_log2; + int uniform_tile_spacing; + int tile_size_bytes; + uint16_t context_update_tile_id; + uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS]; + uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS]; } D3D12VAEncodeAV1Context; typedef struct D3D12VAEncodeAV1Level { @@ -110,7 +120,6 @@ typedef struct D3D12VAEncodeAV1Level { D3D12_VIDEO_ENCODER_AV1_LEVELS d3d12_level; } D3D12VAEncodeAV1Level; - static const D3D12VAEncodeAV1Level av1_levels[] = { { 0, D3D12_VIDEO_ENCODER_AV1_LEVELS_2_0 }, { 1, D3D12_VIDEO_ENCODER_AV1_LEVELS_2_1 }, @@ -138,6 +147,12 @@ static const D3D12VAEncodeAV1Level av1_levels[] = { { 23, D3D12_VIDEO_ENCODER_AV1_LEVELS_7_3 }, }; + typedef struct D3D12VAEncodeAV1TileInfo { + uint64_t full; + uint64_t prefix; + uint64_t data; +} D3D12VAEncodeAV1TileInfo; + static const D3D12_VIDEO_ENCODER_AV1_PROFILE profile_main = D3D12_VIDEO_ENCODER_AV1_PROFILE_MAIN; static const D3D12_VIDEO_ENCODER_AV1_PROFILE profile_high = D3D12_VIDEO_ENCODER_AV1_PROFILE_HIGH; static const D3D12_VIDEO_ENCODER_AV1_PROFILE profile_professional = D3D12_VIDEO_ENCODER_AV1_PROFILE_PROFESSIONAL; @@ -225,10 +240,22 @@ static int d3d12va_encode_av1_update_current_frame_picture_header(AVCodecContext err = AVERROR_UNKNOWN; return err; } - post_encode_values = (D3D12_VIDEO_ENCODER_AV1_POST_ENCODE_VALUES*) (data + - sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA) + - sizeof(D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA) + - sizeof(D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES)); + D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES *tile_partition; + uint64_t nb_subregions = ((D3D12_VIDEO_ENCODER_OUTPUT_METADATA *)data)->WrittenSubregionsCount; + if (!nb_subregions) + nb_subregions = 1; + + tile_partition = (D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES *) (data + + sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA) + + sizeof(D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA) * nb_subregions); + + post_encode_values = (D3D12_VIDEO_ENCODER_AV1_POST_ENCODE_VALUES *) (data + + sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA) + + sizeof(D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA) * nb_subregions + + sizeof(D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES)); + + if (nb_subregions > 1) + fh->context_update_tile_id = tile_partition->ContextUpdateTileId; if (priv->post_encode_values_flag & D3D12_VIDEO_ENCODER_AV1_POST_ENCODE_VALUES_FLAG_QUANTIZATION) { fh->base_q_idx = post_encode_values->Quantization.BaseQIndex; @@ -327,6 +354,10 @@ static int d3d12va_encode_av1_write_tile_group(AVCodecContext *avctx, AV1RawTileGroup *tg = &tile_group_obu->obu.tile_group; int err = 0; + tg->tile_start_and_end_present_flag = 0; + tg->tg_start = 0; + tg->tg_end = priv->tile_cols * priv->tile_rows - 1; + tg->tile_data.data = tile_group; tg->tile_data.data_ref = NULL; tg->tile_data.data_size = tile_group_size; @@ -344,100 +375,174 @@ fail: } static int d3d12va_encode_av1_get_buffer_size(AVCodecContext *avctx, - D3D12VAEncodePicture *pic, size_t *size) + D3D12VAEncodePicture *pic, + uint64_t tile_size_bytes, + uint64_t *nb_subregions_out, + D3D12VAEncodeAV1TileInfo **tiles_out, + size_t *tile_payload_size_out) { - D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA *subregion_meta = NULL; - uint8_t *data = NULL; - HRESULT hr = S_OK; - int err = 0; + uint8_t *meta_data = NULL; + HRESULT hr = S_OK; + int err = 0; - hr = ID3D12Resource_Map(pic->resolved_metadata, 0, NULL, (void **)&data); - if (FAILED(hr)) { - err = AVERROR_UNKNOWN; - return err; + D3D12_VIDEO_ENCODER_OUTPUT_METADATA *out; + D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA *subregions; + D3D12VAEncodeAV1TileInfo *tiles; + uint64_t nb_subregions; + size_t tile_payload_size = 0; + + hr = ID3D12Resource_Map(pic->resolved_metadata, 0, NULL, (void **)&meta_data); + if (FAILED(hr)) + return AVERROR_UNKNOWN; + + out = (D3D12_VIDEO_ENCODER_OUTPUT_METADATA *)meta_data; + subregions = (D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA *) (meta_data + + sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA)); + + if (out->EncodeErrorFlags != D3D12_VIDEO_ENCODER_ENCODE_ERROR_FLAG_NO_ERROR) { + av_log(avctx, AV_LOG_ERROR, "AV1 encode failed: error flags %#"PRIx64".\n", + (uint64_t)out->EncodeErrorFlags); + err = AVERROR_EXTERNAL; + goto unmap; } - subregion_meta = (D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA*)(data + sizeof(D3D12_VIDEO_ENCODER_OUTPUT_METADATA)); - if (subregion_meta->bSize == 0) { + nb_subregions = out->WrittenSubregionsCount; + if (nb_subregions == 0 || subregions[0].bSize == 0) { av_log(avctx, AV_LOG_ERROR, "No subregion metadata found\n"); err = AVERROR(EINVAL); - return err; + goto unmap; } - *size = subregion_meta->bSize; + tiles = av_malloc_array(nb_subregions, sizeof(*tiles)); + if (!tiles) { + err = AVERROR(ENOMEM); + goto unmap; + } + + for (uint64_t i = 0; i < nb_subregions; i++) { + tiles[i].full = subregions[i].bSize; + tiles[i].prefix = subregions[i].bStartOffset; + tiles[i].data = subregions[i].bSize - subregions[i].bStartOffset; + tile_payload_size += tiles[i].data; + } + /* Every tile except the last is prefixed with a tile_size_minus_1 field. */ + if (nb_subregions > 1) + tile_payload_size += (nb_subregions - 1) * tile_size_bytes; + + *nb_subregions_out = nb_subregions; + *tiles_out = tiles; + *tile_payload_size_out = tile_payload_size; + +unmap: ID3D12Resource_Unmap(pic->resolved_metadata, 0, NULL); - - return 0; + return err; } static int d3d12va_encode_av1_get_coded_data(AVCodecContext *avctx, D3D12VAEncodePicture *pic, AVPacket *pkt) { - int err = 0; - uint8_t *ptr = NULL; - uint8_t *mapped_data = NULL; - size_t total_size = 0; - HRESULT hr = S_OK; - size_t av1_pic_hd_size = 0; - int tile_group_extra_size = 0; - size_t bit_len = 0; + D3D12VAEncodeAV1Context *priv = avctx->priv_data; + int err = 0; + uint8_t *ptr = NULL; + uint8_t *mapped_data = NULL; + uint8_t *tile_group_buf = NULL; + char *obu_buf = NULL; + HRESULT hr = S_OK; + size_t av1_pic_hd_size = 0; + size_t bit_len = 0; + size_t obu_size = 0; + size_t tile_payload_size = 0; + size_t total_size = 0; + uint64_t nb_subregions = 0; + int output_buffer_mapped = 0; + D3D12VAEncodeAV1TileInfo *tiles = NULL; char pic_hd_data[MAX_PARAM_BUFFER_SIZE] = { 0 }; - err = d3d12va_encode_av1_get_buffer_size(avctx, pic, &total_size); + err = d3d12va_encode_av1_get_buffer_size(avctx, pic, priv->tile_size_bytes, + &nb_subregions, &tiles, + &tile_payload_size); if (err < 0) goto end; - // Update the picture header and calculate the picture header size - memset(pic_hd_data, 0, sizeof(pic_hd_data)); - err = d3d12va_encode_av1_write_picture_header(avctx, pic, pic_hd_data, &av1_pic_hd_size); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to write picture header: %d.\n", err); - return err; - } - av1_pic_hd_size /= 8; - av_log(avctx, AV_LOG_DEBUG, "AV1 picture header size: %zu bytes.\n", av1_pic_hd_size); - - - tile_group_extra_size = (av_log2(total_size) + 7) / 7 + 1; // 1 byte for obu header, rest for tile group LEB128 size - av_log(avctx, AV_LOG_DEBUG, "Tile group extra size: %d bytes.\n", tile_group_extra_size); - - total_size += (pic->header_size + tile_group_extra_size + av1_pic_hd_size); - av_log(avctx, AV_LOG_DEBUG, "Output buffer size %zu\n", total_size); - hr = ID3D12Resource_Map(pic->output_buffer, 0, NULL, (void **)&mapped_data); if (FAILED(hr)) { err = AVERROR_UNKNOWN; goto end; } + output_buffer_mapped = 1; + tile_group_buf = av_malloc(tile_payload_size); + if (!tile_group_buf) { + err = AVERROR(ENOMEM); + goto end; + } + + uint8_t *dst = tile_group_buf; + uint8_t *tiles_base = mapped_data + pic->aligned_header_size; + uint64_t src_off = 0; + for (uint64_t i = 0; i < nb_subregions; i++) { + /* Tiles are laid out contiguously by their full size; the actual + * tile data starts bStartOffset bytes into each subregion. */ + uint64_t src_pos = src_off + tiles[i].prefix; + src_off += tiles[i].full; + + if (i != nb_subregions - 1) { + /* Write tile_size_minus_1 as a little-endian integer */ + uint64_t v = tiles[i].data - 1; + for (uint64_t b = 0; b < priv->tile_size_bytes; b++) + *dst++ = (v >> (8 * b)) & 0xff; + } + memcpy(dst, tiles_base + src_pos, tiles[i].data); + dst += tiles[i].data; + } + + err = d3d12va_encode_av1_write_picture_header(avctx, pic, pic_hd_data, &bit_len); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write picture header: %d.\n", err); + goto end; + } + av1_pic_hd_size = bit_len / 8; + + + + obu_buf = av_malloc(tile_payload_size + MAX_PARAM_BUFFER_SIZE); + if (!obu_buf) { + err = AVERROR(ENOMEM); + goto end; + } + err = d3d12va_encode_av1_write_tile_group(avctx, tile_group_buf, + tile_payload_size, obu_buf, &bit_len); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write tile group: %d.\n", err); + goto end; + } + obu_size = bit_len / 8; + + total_size = pic->header_size + av1_pic_hd_size + obu_size; err = ff_get_encode_buffer(avctx, pkt, total_size, 0); if (err < 0) goto end; ptr = pkt->data; memcpy(ptr, mapped_data, pic->header_size); - ptr += pic->header_size; - mapped_data += pic->aligned_header_size; - total_size -= pic->header_size; memcpy(ptr, pic_hd_data, av1_pic_hd_size); ptr += av1_pic_hd_size; - total_size -= av1_pic_hd_size; - av_log(avctx, AV_LOG_DEBUG, "AV1 total_size after write picture header: %zu.\n", total_size); - total_size -= tile_group_extra_size; - err = d3d12va_encode_av1_write_tile_group(avctx, mapped_data, total_size, ptr, &bit_len); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to write tile group: %d.\n", err); - goto end; - } - assert((total_size + tile_group_extra_size) * 8 == bit_len); + memcpy(ptr, obu_buf, obu_size); - ID3D12Resource_Unmap(pic->output_buffer, 0, NULL); + av_log(avctx, AV_LOG_DEBUG, "AV1 packet: %"PRIu64" tiles, header %d, " + "pic header %zu, tile group %zu, total %zu bytes.\n", + nb_subregions, pic->header_size, av1_pic_hd_size, obu_size, total_size); end: + if (output_buffer_mapped) + ID3D12Resource_Unmap(pic->output_buffer, 0, NULL); + av_freep(&tiles); + av_freep(&tile_group_buf); + av_freep(&obu_buf); av_buffer_unref(&pic->output_buffer_ref); pic->output_buffer = NULL; return err; @@ -495,11 +600,13 @@ static int d3d12va_hw_base_encode_init_params_av1(FFHWBaseEncodeContext *base_ct else framerate = 0; - //currently only supporting 1 tile + D3D12VAEncodeAV1Context *priv = avctx->priv_data; + int tile_cols = priv->tile_cols > 0 ? priv->tile_cols : 1; + int tile_rows = priv->tile_rows > 0 ? priv->tile_rows : 1; + level = ff_av1_guess_level(avctx->bit_rate, opts->tier, base_ctx->surface_width, base_ctx->surface_height, - /*priv->tile_rows*/1 * 1/*priv->tile_cols*/, - /*priv->tile_cols*/1, framerate); + tile_rows * tile_cols, tile_cols, framerate); if (level) { av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name); seq->seq_level_idx[0] = level->level_idx; @@ -517,6 +624,7 @@ static int d3d12va_hw_base_encode_init_params_av1(FFHWBaseEncodeContext *base_ct seq->reduced_still_picture_header = seq->still_picture; // Feature flags + seq->use_128x128_superblock = opts->enable_128x128_superblock; seq->enable_filter_intra = opts->enable_filter_intra; seq->enable_intra_edge_filter = opts->enable_intra_edge_filter; seq->enable_interintra_compound = opts->enable_interintra_compound; @@ -557,7 +665,7 @@ static int d3d12va_encode_av1_init_sequence_params(AVCodecContext *avctx) .InputFormat = hwctx->format, .RateControl = ctx->rc, .IntraRefresh = ctx->intra_refresh.Mode, - .SubregionFrameEncoding = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME, + .SubregionFrameEncoding = ctx->subregion_mode, .ResolutionsListCount = 1, .pResolutionList = &ctx->resolution, .CodecGopSequence = ctx->gop, @@ -637,6 +745,173 @@ static int d3d12va_encode_av1_init_sequence_params(AVCodecContext *avctx) return 0; } +static av_always_inline int d3d12va_encode_av1_tile_log2(int blksize, int target) +{ + int k; + for (k = 0; (blksize << k) < target; k++); + return k; +} + +static int d3d12va_encode_av1_set_tile(AVCodecContext *avctx) +{ + D3D12VAEncodeContext *ctx = avctx->priv_data; + D3D12VAEncodeAV1Context *priv = avctx->priv_data; + D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES *tiles_layout; + + ctx->subregions_layout.DataSize = + sizeof(D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES); + tiles_layout = av_mallocz(ctx->subregions_layout.DataSize); + if (!tiles_layout) + return AVERROR(ENOMEM); + ctx->subregions_layout.pTilesPartition_AV1 = tiles_layout; + + int use_128 = priv->unit_opts.enable_128x128_superblock; + int width = ctx->resolution.Width; + int height = ctx->resolution.Height; + + int mi_cols = 2 * ((width + 7) >> 3); + int mi_rows = 2 * ((height + 7) >> 3); + int sb_cols = use_128 ? ((mi_cols + 31) >> 5) : ((mi_cols + 15) >> 4); + int sb_rows = use_128 ? ((mi_rows + 31) >> 5) : ((mi_rows + 15) >> 4); + int sb_shift = use_128 ? 5 : 4; + int sb_size = sb_shift + 2; + + int max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size; + int max_tile_area_sb = AV1_MAX_TILE_AREA >> (2 * sb_size); + + int min_log2_tile_cols = d3d12va_encode_av1_tile_log2(max_tile_width_sb, sb_cols); + int min_log2_tiles = FFMAX(min_log2_tile_cols, + d3d12va_encode_av1_tile_log2(max_tile_area_sb, sb_rows * sb_cols)); + int max_tile_area_sb_varied; + int tile_width_sb, tile_height_sb, widest_tile_sb; + int tile_cols, tile_rows; + int min_tile_cols = (sb_cols + max_tile_width_sb - 1) / max_tile_width_sb; + int i; + + if (priv->tile_cols > AV1_MAX_TILE_COLS || + priv->tile_rows > AV1_MAX_TILE_ROWS) { + av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d, should be at " + "most %dx%d.\n", priv->tile_cols, priv->tile_rows, + AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS); + return AVERROR(EINVAL); + } + + /* Calculate tile columns. When the user did not set tile explicitly + * (tile_cols == 0) this fallback to the minimum valid number */ + tile_cols = av_clip(priv->tile_cols, min_tile_cols, sb_cols); + if (!priv->tile_cols) + priv->tile_cols = tile_cols; + else if (priv->tile_cols != tile_cols) { + av_log(avctx, AV_LOG_ERROR, "Invalid tile cols %d, should be in range " + "of %d~%d.\n", priv->tile_cols, min_tile_cols, sb_cols); + return AVERROR(EINVAL); + } + + priv->tile_cols_log2 = d3d12va_encode_av1_tile_log2(1, priv->tile_cols); + tile_width_sb = (sb_cols + (1 << priv->tile_cols_log2) - 1) >> priv->tile_cols_log2; + + if (priv->tile_rows > sb_rows) { + av_log(avctx, AV_LOG_ERROR, "Invalid tile rows %d, should be less than " + "%d.\n", priv->tile_rows, sb_rows); + return AVERROR(EINVAL); + } + + tile_rows = priv->tile_rows ? priv->tile_rows : 1; + for (; tile_rows <= sb_rows && tile_rows <= AV1_MAX_TILE_ROWS; tile_rows++) { + + /* try uniformed tile first. */ + priv->tile_rows_log2 = d3d12va_encode_av1_tile_log2(1, tile_rows); + + if ((sb_cols + tile_width_sb - 1) / tile_width_sb == priv->tile_cols) { + for (i = 0; i < priv->tile_cols - 1; i++) + priv->width_in_sbs_minus_1[i] = tile_width_sb - 1; + priv->width_in_sbs_minus_1[i] = sb_cols - (priv->tile_cols - 1) * tile_width_sb - 1; + + tile_height_sb = (sb_rows + (1 << priv->tile_rows_log2) - 1) >> + priv->tile_rows_log2; + + if ((sb_rows + tile_height_sb - 1) / tile_height_sb == tile_rows && + tile_height_sb <= max_tile_area_sb / tile_width_sb) { + for (i = 0; i < tile_rows - 1; i++) + priv->height_in_sbs_minus_1[i] = tile_height_sb - 1; + priv->height_in_sbs_minus_1[i] = sb_rows - (tile_rows - 1) * tile_height_sb - 1; + + priv->uniform_tile_spacing = 1; + break; + } + } + + /* Try non-uniform fallback: distribute columns/rows as evenly as possible */ + widest_tile_sb = 0; + for (i = 0; i < priv->tile_cols; i++) { + priv->width_in_sbs_minus_1[i] = + (i + 1) * sb_cols / priv->tile_cols - i * sb_cols / priv->tile_cols - 1; + widest_tile_sb = FFMAX(widest_tile_sb, priv->width_in_sbs_minus_1[i] + 1); + } + + if (min_log2_tiles) + max_tile_area_sb_varied = (sb_rows * sb_cols) >> (min_log2_tiles + 1); + else + max_tile_area_sb_varied = sb_rows * sb_cols; + tile_height_sb = FFMAX(1, max_tile_area_sb_varied / widest_tile_sb); + + if (tile_rows == av_clip(tile_rows, + (sb_rows + tile_height_sb - 1) / tile_height_sb, + sb_rows)) { + for (i = 0; i < tile_rows; i++) + priv->height_in_sbs_minus_1[i] = + (i + 1) * sb_rows / tile_rows - i * sb_rows / tile_rows - 1; + + priv->uniform_tile_spacing = 0; + break; + } + + if (priv->tile_rows) { + av_log(avctx, AV_LOG_ERROR, "Invalid tile rows %d.\n", priv->tile_rows); + return AVERROR(EINVAL); + } + } + + priv->tile_rows = tile_rows; + av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to %d/%d.\n", + priv->tile_cols, priv->tile_rows); + + if (priv->tile_cols > AV1_MAX_TILE_COLS || priv->tile_rows > AV1_MAX_TILE_ROWS) { + av_log(avctx, AV_LOG_ERROR, "Resolution %dx%d requires %dx%d tiles which " + "exceeds the AV1 maximum of %dx%d.\n", width, height, + priv->tile_cols, priv->tile_rows, AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS); + return AVERROR(EINVAL); + } + + priv->context_update_tile_id = priv->tile_cols * priv->tile_rows - 1; + priv->tile_size_bytes = 4; + + tiles_layout->RowCount = priv->tile_rows; + tiles_layout->ColCount = priv->tile_cols; + for (i = 0; i < priv->tile_cols; i++) + tiles_layout->ColWidths[i] = priv->width_in_sbs_minus_1[i] + 1; + for (i = 0; i < priv->tile_rows; i++) + tiles_layout->RowHeights[i] = priv->height_in_sbs_minus_1[i] + 1; + tiles_layout->ContextUpdateTileId = priv->context_update_tile_id; + + ctx->num_subregions = priv->tile_cols * priv->tile_rows; + + if (ctx->num_subregions <= 1) + ctx->subregion_mode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME; + else if (priv->uniform_tile_spacing) + ctx->subregion_mode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_UNIFORM_GRID_PARTITION; + else + ctx->subregion_mode = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_CONFIGURABLE_GRID_PARTITION; + + av_log(avctx, AV_LOG_VERBOSE, "AV1 tile partition: %d cols x %d rows " + "(%s superblocks, %s).\n", priv->tile_cols, priv->tile_rows, + use_128 ? "128x128" : "64x64", + ctx->num_subregions <= 1 ? "full frame" : + priv->uniform_tile_spacing ? "uniform grid" : "configurable grid"); + + return 0; +} + static int d3d12va_encode_av1_get_encoder_caps(AVCodecContext *avctx) { HRESULT hr = S_OK; @@ -848,21 +1123,6 @@ static int d3d12va_encode_av1_set_level(AVCodecContext *avctx) return 0; } -static int d3d12va_encode_av1_set_tile(AVCodecContext *avctx) -{ - D3D12VAEncodeContext *ctx = avctx->priv_data; - - ctx->subregions_layout.DataSize = sizeof(D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES); - D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES *tiles_layout = av_mallocz(ctx->subregions_layout.DataSize); - ctx->subregions_layout.pTilesPartition_AV1 = tiles_layout; - - // Currently only support 1 tile - tiles_layout->RowCount = 1; - tiles_layout->ColCount = 1; - - return 0; -} - static void d3d12va_encode_av1_free_picture_params(D3D12VAEncodePicture *pic) { if (!pic->pic_ctl.pAV1PicData) @@ -972,9 +1232,18 @@ static int d3d12va_encode_av1_init_picture_params(AVCodecContext *avctx, fh->render_height_minus_1 = fh->frame_height_minus_1; fh->is_filter_switchable = 1; fh->interpolation_filter = AV1_INTERPOLATION_FILTER_SWITCHABLE; - fh->uniform_tile_spacing_flag = 1; - fh->width_in_sbs_minus_1[0] = (ctx->resolution.Width + 63 >> 6) -1; // 64x64 superblock size - fh->height_in_sbs_minus_1[0] = (ctx->resolution.Height + 63 >> 6) -1; // 64x64 superblock size + + fh->uniform_tile_spacing_flag = priv->uniform_tile_spacing; + fh->tile_cols = priv->tile_cols; + fh->tile_rows = priv->tile_rows; + fh->tile_cols_log2 = priv->tile_cols_log2; + fh->tile_rows_log2 = priv->tile_rows_log2; + fh->context_update_tile_id = priv->context_update_tile_id; + fh->tile_size_bytes_minus1 = priv->tile_size_bytes - 1; + for (i = 0; i < priv->tile_cols; i++) + fh->width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i]; + for (i = 0; i < priv->tile_rows; i++) + fh->height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i]; memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas, AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t)); @@ -1163,6 +1432,11 @@ static const AVOption d3d12va_encode_av1_options[] = { { LEVEL("7.2", 22) }, { LEVEL("7.3", 23) }, #undef LEVEL + + { "tiles", "Tile columns x rows (Use minimal tile column/row number " + "automatically by default)", + OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, FLAGS }, + { NULL }, }; -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
