PR #21116 opened by averne URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21116 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21116.patch
The VK spec forbids using clear commands on YUV images, so we need to allocate separate per-plane images. This removes the need for a separate reset shader. >From 77be17e3a29124d6852339afdffe9795980f5812 Mon Sep 17 00:00:00 2001 From: averne <[email protected]> Date: Sat, 6 Dec 2025 19:45:18 +0100 Subject: [PATCH] vulkan/prores: use vkCmdClearColorImage The VK spec forbids using clear commands on YUV images, so we need to allocate separate per-plane images. This removes the need for a separate reset shader. --- libavcodec/vulkan/Makefile | 1 - libavcodec/vulkan/prores_reset.comp | 38 ----------- libavcodec/vulkan_decode.c | 28 ++++++-- libavcodec/vulkan_prores.c | 102 ++++++++++++---------------- 4 files changed, 65 insertions(+), 104 deletions(-) delete mode 100644 libavcodec/vulkan/prores_reset.comp diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 26e8e147c2..9d1349e0e3 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -19,7 +19,6 @@ OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \ vulkan/prores_raw_idct.o OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \ - vulkan/prores_reset.o \ vulkan/prores_vld.o \ vulkan/prores_idct.o diff --git a/libavcodec/vulkan/prores_reset.comp b/libavcodec/vulkan/prores_reset.comp deleted file mode 100644 index 51cbc6b3d9..0000000000 --- a/libavcodec/vulkan/prores_reset.comp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -void main(void) -{ - uvec3 gid = gl_GlobalInvocationID; -#ifndef INTERLACED - ivec2 pos = ivec2(gid); -#else - ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field); -#endif - - /* Clear luma plane */ - imageStore(dst[0], pos, uvec4(0)); - - /* Clear chroma plane */ - if (gid.x < mb_width << (4 - log2_chroma_w)) { - imageStore(dst[1], pos, uvec4(0)); - imageStore(dst[2], pos, uvec4(0)); - } - - /* Alpha plane doesn't need a clear because it is not sparsely encoded */ -} diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index d6f6ec8c3b..364ceb7bf3 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -1089,7 +1089,7 @@ static void free_profile_data(AVHWFramesContext *hwfc) int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) { VkFormat vkfmt = VK_FORMAT_UNDEFINED; - int err, dedicated_dpb; + int err, dedicated_dpb, num_imgs = 1; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; AVVulkanFramesContext *hwfc = frames_ctx->hwctx; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; @@ -1141,6 +1141,20 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) /* mpv has issues with bgr0 mapping, so just remap it */ frames_ctx->sw_format = AV_PIX_FMT_RGB0; break; + case AV_PIX_FMT_YUV422P10: /* ProRes needs to clear the input image, which is not possible on YUV formats */ + case AV_PIX_FMT_YUV444P10: + case AV_PIX_FMT_YUV422P12: + case AV_PIX_FMT_YUV444P12: + vkfmt = VK_FORMAT_R16_UNORM; + num_imgs = 3; + break; + case AV_PIX_FMT_YUVA422P10: /* Ditto */ + case AV_PIX_FMT_YUVA444P10: + case AV_PIX_FMT_YUVA422P12: + case AV_PIX_FMT_YUVA444P12: + vkfmt = VK_FORMAT_R16_UNORM; + num_imgs = 4; + break; default: break; } @@ -1151,11 +1165,13 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) frames_ctx->height = FFALIGN(avctx->coded_height, 1 << pdesc->log2_chroma_h); frames_ctx->format = AV_PIX_FMT_VULKAN; - hwfc->format[0] = vkfmt; - hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; - hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT; + for (int i = 0; i < num_imgs; ++i) + hwfc->format[i] = vkfmt; + + hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; if (prof) { FFVulkanDecodeShared *ctx; diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c index 90b8610817..4a0f86eb9a 100644 --- a/libavcodec/vulkan_prores.c +++ b/libavcodec/vulkan_prores.c @@ -24,7 +24,6 @@ #include "libavutil/vulkan_spirv.h" extern const char *ff_source_common_comp; -extern const char *ff_source_prores_reset_comp; extern const char *ff_source_prores_vld_comp; extern const char *ff_source_prores_idct_comp; @@ -46,7 +45,6 @@ typedef struct ProresVulkanDecodePicture { } ProresVulkanDecodePicture; typedef struct ProresVulkanDecodeContext { - FFVulkanShader reset; FFVulkanShader vld; FFVulkanShader idct; @@ -157,12 +155,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx) ProresVulkanDecodeContext *pv = ctx->sd_ctx; ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private; FFVulkanDecodePicture *vp = &pp->vp; + AVFrame *f = pr->frame; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; ProresVkParameters pd; FFVkBuffer *slice_data, *metadata; VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; VkBufferMemoryBarrier2 buf_bar[2]; - int nb_img_bar = 0, nb_buf_bar = 0, err; + int nb_img_bar = 0, nb_buf_bar = 0, nb_imgs, i, err; const AVPixFmtDescriptor *pix_desc; if (!pp->slice_num) @@ -199,12 +199,11 @@ static int vk_prores_end_frame(AVCodecContext *avctx) RET(ff_vk_exec_start(&ctx->s, exec)); /* Prepare deps */ - RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame, + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, - pr->frame)); + RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, f)); /* Transfer ownership to the exec context */ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); @@ -212,11 +211,44 @@ static int vk_prores_end_frame(AVCodecContext *avctx) RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->metadata_buf, 1, 0)); pp->metadata_buf = NULL; + vkf->layout[0] = VK_IMAGE_LAYOUT_UNDEFINED; + vkf->access[0] = VK_ACCESS_2_NONE; + + nb_imgs = ff_vk_count_images(vkf); + /* Input barrier */ - ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar, + ff_vk_frame_barrier(&ctx->s, exec, f, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_CLEAR_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + nb_img_bar = nb_buf_bar = 0; + + /* Clear the input image since the vld shader does sparse writes, except for alpha */ + for (i = 0; i < FFMAX(nb_imgs, 3); ++i) { + vk->CmdClearColorImage(exec->buf, vkf->img[i], + VK_IMAGE_LAYOUT_GENERAL, + &((VkClearColorValue) { 0 }), + 1, &((VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + })); + } + + ff_vk_frame_barrier(&ctx->s, exec, f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_CLEAR_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); @@ -244,37 +276,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx) }); nb_img_bar = nb_buf_bar = 0; - /* Reset */ - ff_vk_shader_update_img_array(&ctx->s, exec, &pv->reset, - pr->frame, vp->view.out, - 0, 0, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - - ff_vk_exec_bind_shader(&ctx->s, exec, &pv->reset); - ff_vk_shader_update_push_const(&ctx->s, exec, &pv->reset, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - - vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1); - - /* Input frame barrier after reset */ - ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - nb_img_bar = nb_buf_bar = 0; - /* Entropy decode */ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &pv->vld, 0, 0, 0, @@ -287,7 +288,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx) pp->mb_params_sz, VK_FORMAT_UNDEFINED); ff_vk_shader_update_img_array(&ctx->s, exec, &pv->vld, - pr->frame, vp->view.out, + f, vp->view.out, 0, 2, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); @@ -301,7 +302,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx) 3 + !!pr->alpha_info); /* Synchronize vld and idct shaders */ - ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar, + ff_vk_frame_barrier(&ctx->s, exec, f, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, @@ -339,7 +340,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx) pp->mb_params_sz, VK_FORMAT_UNDEFINED); ff_vk_shader_update_img_array(&ctx->s, exec, &pv->idct, - pr->frame, vp->view.out, + f, vp->view.out, 0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); @@ -434,7 +435,6 @@ static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx) { ProresVulkanDecodeContext *pv = ctx->sd_ctx; - ff_vk_shader_free(&ctx->s, &pv->reset); ff_vk_shader_free(&ctx->s, &pv->vld); ff_vk_shader_free(&ctx->s, &pv->idct); @@ -478,22 +478,6 @@ static int vk_decode_prores_init(AVCodecContext *avctx) ctx->sd_ctx_free = vk_decode_prores_uninit; - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "dst", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .mem_quali = "writeonly", - .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &pv->reset, - "prores_dec_reset", "main", desc_set, 1, - ff_source_prores_reset_comp, 0x080801, pr->frame_type != 0)); - desc_set = (FFVulkanDescriptorSetBinding []) { { .name = "slice_offsets_buf", -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
