PR #21116 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21116
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21116.patch

The VK spec forbids using clear commands on YUV images,
so we need to allocate separate per-plane images.
This removes the need for a separate reset shader.


>From 77be17e3a29124d6852339afdffe9795980f5812 Mon Sep 17 00:00:00 2001
From: averne <[email protected]>
Date: Sat, 6 Dec 2025 19:45:18 +0100
Subject: [PATCH] vulkan/prores: use vkCmdClearColorImage

The VK spec forbids using clear commands on YUV images,
so we need to allocate separate per-plane images.
This removes the need for a separate reset shader.
---
 libavcodec/vulkan/Makefile          |   1 -
 libavcodec/vulkan/prores_reset.comp |  38 -----------
 libavcodec/vulkan_decode.c          |  28 ++++++--
 libavcodec/vulkan_prores.c          | 102 ++++++++++++----------------
 4 files changed, 65 insertions(+), 104 deletions(-)
 delete mode 100644 libavcodec/vulkan/prores_reset.comp

diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 26e8e147c2..9d1349e0e3 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -19,7 +19,6 @@ OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
                                             vulkan/prores_raw_idct.o
 
 OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
-                                        vulkan/prores_reset.o \
                                         vulkan/prores_vld.o \
                                         vulkan/prores_idct.o
 
diff --git a/libavcodec/vulkan/prores_reset.comp 
b/libavcodec/vulkan/prores_reset.comp
deleted file mode 100644
index 51cbc6b3d9..0000000000
--- a/libavcodec/vulkan/prores_reset.comp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-void main(void)
-{
-    uvec3 gid = gl_GlobalInvocationID;
-#ifndef INTERLACED
-    ivec2 pos = ivec2(gid);
-#else
-    ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field);
-#endif
-
-    /* Clear luma plane */
-    imageStore(dst[0], pos, uvec4(0));
-
-    /* Clear chroma plane */
-    if (gid.x < mb_width << (4 - log2_chroma_w)) {
-        imageStore(dst[1], pos, uvec4(0));
-        imageStore(dst[2], pos, uvec4(0));
-    }
-
-    /* Alpha plane doesn't need a clear because it is not sparsely encoded */
-}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index d6f6ec8c3b..364ceb7bf3 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1089,7 +1089,7 @@ static void free_profile_data(AVHWFramesContext *hwfc)
 int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
 {
     VkFormat vkfmt = VK_FORMAT_UNDEFINED;
-    int err, dedicated_dpb;
+    int err, dedicated_dpb, num_imgs = 1;
     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
     AVVulkanFramesContext *hwfc = frames_ctx->hwctx;
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
@@ -1141,6 +1141,20 @@ int ff_vk_frame_params(AVCodecContext *avctx, 
AVBufferRef *hw_frames_ctx)
             /* mpv has issues with bgr0 mapping, so just remap it */
             frames_ctx->sw_format = AV_PIX_FMT_RGB0;
             break;
+        case AV_PIX_FMT_YUV422P10: /* ProRes needs to clear the input image, 
which is not possible on YUV formats */
+        case AV_PIX_FMT_YUV444P10:
+        case AV_PIX_FMT_YUV422P12:
+        case AV_PIX_FMT_YUV444P12:
+            vkfmt = VK_FORMAT_R16_UNORM;
+            num_imgs = 3;
+            break;
+        case AV_PIX_FMT_YUVA422P10: /* Ditto */
+        case AV_PIX_FMT_YUVA444P10:
+        case AV_PIX_FMT_YUVA422P12:
+        case AV_PIX_FMT_YUVA444P12:
+            vkfmt = VK_FORMAT_R16_UNORM;
+            num_imgs = 4;
+            break;
         default:
             break;
         }
@@ -1151,11 +1165,13 @@ int ff_vk_frame_params(AVCodecContext *avctx, 
AVBufferRef *hw_frames_ctx)
     frames_ctx->height = FFALIGN(avctx->coded_height, 1 << 
pdesc->log2_chroma_h);
     frames_ctx->format = AV_PIX_FMT_VULKAN;
 
-    hwfc->format[0]    = vkfmt;
-    hwfc->tiling       = VK_IMAGE_TILING_OPTIMAL;
-    hwfc->usage        = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
-                         VK_IMAGE_USAGE_STORAGE_BIT      |
-                         VK_IMAGE_USAGE_SAMPLED_BIT;
+    for (int i = 0; i < num_imgs; ++i)
+        hwfc->format[i] = vkfmt;
+
+    hwfc->tiling = VK_IMAGE_TILING_OPTIMAL;
+    hwfc->usage  = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                   VK_IMAGE_USAGE_STORAGE_BIT      |
+                   VK_IMAGE_USAGE_SAMPLED_BIT;
 
     if (prof) {
         FFVulkanDecodeShared *ctx;
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 90b8610817..4a0f86eb9a 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -24,7 +24,6 @@
 #include "libavutil/vulkan_spirv.h"
 
 extern const char *ff_source_common_comp;
-extern const char *ff_source_prores_reset_comp;
 extern const char *ff_source_prores_vld_comp;
 extern const char *ff_source_prores_idct_comp;
 
@@ -46,7 +45,6 @@ typedef struct ProresVulkanDecodePicture {
 } ProresVulkanDecodePicture;
 
 typedef struct ProresVulkanDecodeContext {
-    FFVulkanShader reset;
     FFVulkanShader vld;
     FFVulkanShader idct;
 
@@ -157,12 +155,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     ProresVulkanDecodeContext *pv = ctx->sd_ctx;
     ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
     FFVulkanDecodePicture     *vp = &pp->vp;
+    AVFrame                    *f = pr->frame;
+    AVVkFrame                *vkf = (AVVkFrame *)f->data[0];
 
     ProresVkParameters pd;
     FFVkBuffer *slice_data, *metadata;
     VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
     VkBufferMemoryBarrier2 buf_bar[2];
-    int nb_img_bar = 0, nb_buf_bar = 0, err;
+    int nb_img_bar = 0, nb_buf_bar = 0, nb_imgs, i, err;
     const AVPixFmtDescriptor *pix_desc;
 
     if (!pp->slice_num)
@@ -199,12 +199,11 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     RET(ff_vk_exec_start(&ctx->s, exec));
 
     /* Prepare deps */
-    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame,
+    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f,
                                  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
                                  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
 
-    RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
-                                    pr->frame));
+    RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, 
f));
 
     /* Transfer ownership to the exec context */
     RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
@@ -212,11 +211,44 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->metadata_buf, 1, 0));
     pp->metadata_buf = NULL;
 
+    vkf->layout[0] = VK_IMAGE_LAYOUT_UNDEFINED;
+    vkf->access[0] = VK_ACCESS_2_NONE;
+
+    nb_imgs = ff_vk_count_images(vkf);
+
     /* Input barrier */
-    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+    ff_vk_frame_barrier(&ctx->s, exec, f, img_bar, &nb_img_bar,
                         VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_CLEAR_BIT,
+                        VK_ACCESS_2_TRANSFER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers    = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+        .pImageMemoryBarriers     = img_bar,
+        .imageMemoryBarrierCount  = nb_img_bar,
+    });
+    nb_img_bar = nb_buf_bar = 0;
+
+    /* Clear the input image since the vld shader does sparse writes, except 
for alpha */
+    for (i = 0; i < FFMAX(nb_imgs, 3); ++i) {
+        vk->CmdClearColorImage(exec->buf, vkf->img[i],
+                               VK_IMAGE_LAYOUT_GENERAL,
+                               &((VkClearColorValue) { 0 }),
+                               1, &((VkImageSubresourceRange) {
+                                   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                   .levelCount = 1,
+                                   .layerCount = 1,
+                               }));
+    }
+
+    ff_vk_frame_barrier(&ctx->s, exec, f, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_CLEAR_BIT,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
-                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
                         VK_IMAGE_LAYOUT_GENERAL,
                         VK_QUEUE_FAMILY_IGNORED);
 
@@ -244,37 +276,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     });
     nb_img_bar = nb_buf_bar = 0;
 
-    /* Reset */
-    ff_vk_shader_update_img_array(&ctx->s, exec, &pv->reset,
-                                  pr->frame, vp->view.out,
-                                  0, 0,
-                                  VK_IMAGE_LAYOUT_GENERAL,
-                                  VK_NULL_HANDLE);
-
-    ff_vk_exec_bind_shader(&ctx->s, exec, &pv->reset);
-    ff_vk_shader_update_push_const(&ctx->s, exec, &pv->reset,
-                                   VK_SHADER_STAGE_COMPUTE_BIT,
-                                   0, sizeof(pd), &pd);
-
-    vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1);
-
-    /* Input frame barrier after reset */
-    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
-                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
-                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
-                        VK_ACCESS_SHADER_WRITE_BIT,
-                        VK_IMAGE_LAYOUT_GENERAL,
-                        VK_QUEUE_FAMILY_IGNORED);
-
-    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
-        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
-        .pBufferMemoryBarriers    = buf_bar,
-        .bufferMemoryBarrierCount = nb_buf_bar,
-        .pImageMemoryBarriers     = img_bar,
-        .imageMemoryBarrierCount  = nb_img_bar,
-    });
-    nb_img_bar = nb_buf_bar = 0;
-
     /* Entropy decode */
     ff_vk_shader_update_desc_buffer(&ctx->s, exec, &pv->vld,
                                     0, 0, 0,
@@ -287,7 +288,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                                     pp->mb_params_sz,
                                     VK_FORMAT_UNDEFINED);
     ff_vk_shader_update_img_array(&ctx->s, exec, &pv->vld,
-                                  pr->frame, vp->view.out,
+                                  f, vp->view.out,
                                   0, 2,
                                   VK_IMAGE_LAYOUT_GENERAL,
                                   VK_NULL_HANDLE);
@@ -301,7 +302,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                     3 + !!pr->alpha_info);
 
     /* Synchronize vld and idct shaders */
-    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+    ff_vk_frame_barrier(&ctx->s, exec, f, img_bar, &nb_img_bar,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
                         VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
@@ -339,7 +340,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                                     pp->mb_params_sz,
                                     VK_FORMAT_UNDEFINED);
     ff_vk_shader_update_img_array(&ctx->s, exec, &pv->idct,
-                                  pr->frame, vp->view.out,
+                                  f, vp->view.out,
                                   0, 1,
                                   VK_IMAGE_LAYOUT_GENERAL,
                                   VK_NULL_HANDLE);
@@ -434,7 +435,6 @@ static void vk_decode_prores_uninit(FFVulkanDecodeShared 
*ctx)
 {
     ProresVulkanDecodeContext *pv = ctx->sd_ctx;
 
-    ff_vk_shader_free(&ctx->s, &pv->reset);
     ff_vk_shader_free(&ctx->s, &pv->vld);
     ff_vk_shader_free(&ctx->s, &pv->idct);
 
@@ -478,22 +478,6 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
 
     ctx->sd_ctx_free = vk_decode_prores_uninit;
 
-    desc_set = (FFVulkanDescriptorSetBinding []) {
-        {
-            .name       = "dst",
-            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-            .dimensions = 2,
-            .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
-            FF_VK_REP_NATIVE),
-            .mem_quali  = "writeonly",
-            .elems      = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
-            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-        },
-    };
-    RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &pv->reset,
-                    "prores_dec_reset", "main", desc_set, 1,
-                    ff_source_prores_reset_comp, 0x080801, pr->frame_type != 
0));
-
     desc_set = (FFVulkanDescriptorSetBinding []) {
         {
             .name        = "slice_offsets_buf",
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to