[FFmpeg-cvslog] [ffmpeg] branch master updated. 98412edfed lavc: add a ProRes Vulkan hwaccel

ffmpeg-git--- via ffmpeg-cvslog Sat, 25 Oct 2025 12:54:57 -0700

The branch, master has been updated
       via  98412edfed56f03d6844aafd48f5891dd9d591ec (commit)
       via  3fd55d952efe421908a93d689aa0caf5523b5158 (commit)
       via  987368ef25dd601a92a5d3709985aa28c509f179 (commit)
       via  9195af77eb1be9ab350263a988069f32f085d0f2 (commit)
      from  f5f72b4f8a410ac2de7f1040859032e087bc5492 (commit)



- Log -----------------------------------------------------------------
commit 98412edfed56f03d6844aafd48f5891dd9d591ec
Author:     averne <[email protected]>
AuthorDate: Mon Jun 2 21:31:59 2025 +0200
Commit:     Lynne <[email protected]>
CommitDate: Sat Oct 25 19:54:13 2025 +0000

    lavc: add a ProRes Vulkan hwaccel
    
    Add a shader-based Apple ProRes decoder.
    It supports all codec features for profiles up to
    the 4444 XQ profile, ie.:
    - 4:2:2 and 4:4:4 chroma subsampling
    - 10- and 12-bit component depth
    - Interlacing
    - Alpha
    
    The implementation consists in two shaders: the
    VLD kernel does entropy decoding for color/alpha,
    and the IDCT kernel performs the inverse transform
    on color components.
    
    Benchmarks for a 4k yuv422p10 sample:
    - AMD Radeon 6700XT:   178 fps
    - Intel i7 Tiger Lake: 37 fps
    - NVidia Orin Nano:    70 fps

diff --git a/configure b/configure
index ed4f8c4a94..764bbb0001 100755
--- a/configure
+++ b/configure
@@ -3343,6 +3343,8 @@ prores_videotoolbox_hwaccel_deps="videotoolbox"
 prores_videotoolbox_hwaccel_select="prores_decoder"
 prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
 prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
+prores_vulkan_hwaccel_deps="vulkan spirv_compiler"
+prores_vulkan_hwaccel_select="prores_decoder"
 vc1_d3d11va_hwaccel_deps="d3d11va"
 vc1_d3d11va_hwaccel_select="vc1_decoder"
 vc1_d3d11va2_hwaccel_deps="d3d11va"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 6c007dda8c..f5637cee08 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1106,6 +1106,7 @@ OBJS-$(CONFIG_VP9_VULKAN_HWACCEL)         += 
vulkan_decode.o vulkan_vp9.o
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec.o
 OBJS-$(CONFIG_VVC_VAAPI_HWACCEL)          += vaapi_vvc.o
 OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL)  += vulkan_decode.o 
vulkan_prores_raw.o
+OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL)      += vulkan_decode.o vulkan_prores.o
 
 # Objects duplicated from other libraries for shared builds
 SHLIBOBJS                              += log2_tab.o reverse.o
@@ -1350,7 +1351,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h 
vaapi_encode.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
-SKIPHEADERS-$(CONFIG_VULKAN)           += ffv1_vulkan.h vulkan_video.h \
+SKIPHEADERS-$(CONFIG_VULKAN)           += ffv1_vulkan.h prores_vulkan.h 
vulkan_video.h \
                                           vulkan_encode.h vulkan_decode.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h 
v4l2_m2m.h
 SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 4b205d386e..0894d84a9c 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -68,6 +68,7 @@ extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
 extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
+extern const struct FFHWAccel ff_prores_vulkan_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;
diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 0b6556107f..0e8ec344a2 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -251,7 +251,7 @@ static int decode_frame_header(ProresContext *ctx, const 
uint8_t *buf,
     }
 
     if (pix_fmt != ctx->pix_fmt) {
-#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL)
+#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL + 
CONFIG_PRORES_VULKAN_HWACCEL)
 #if HWACCEL_MAX
         enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
         int ret;
@@ -260,6 +260,9 @@ static int decode_frame_header(ProresContext *ctx, const 
uint8_t *buf,
 
 #if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
         *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+        *fmtp++ = AV_PIX_FMT_VULKAN;
 #endif
         *fmtp++ = ctx->pix_fmt;
         *fmtp = AV_PIX_FMT_NONE;
@@ -872,6 +875,9 @@ const FFCodec ff_prores_decoder = {
     .hw_configs     = (const AVCodecHWConfigInternal *const []) {
 #if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
         HWACCEL_VIDEOTOOLBOX(prores),
+#endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+        HWACCEL_VULKAN(prores),
 #endif
         NULL
     },
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index d8e1471fa6..ec3015fee6 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -17,6 +17,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)  +=  vulkan/common.o \
 OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
                                             vulkan/prores_raw.o
 
+OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
+                                        vulkan/prores_reset.o \
+                                        vulkan/prores_vld.o \
+                                        vulkan/prores_idct.o
+
 VULKAN = $(subst $(SRC_PATH)/,,$(wildcard 
$(SRC_PATH)/libavcodec/vulkan/*.comp))
 .SECONDARY: $(VULKAN:.comp=.c)
 libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/prores_idct.comp 
b/libavcodec/vulkan/prores_idct.comp
new file mode 100644
index 0000000000..645cb02979
--- /dev/null
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -0,0 +1,123 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Two macroblocks, padded to avoid bank conflicts */
+shared float blocks[4*2][8*(8+1)];
+
+uint get_px(uint tex_idx, ivec2 pos)
+{
+#ifndef INTERLACED
+    return imageLoad(dst[tex_idx], pos).x;
+#else
+    return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + 
bottom_field)).x;
+#endif
+}
+
+void put_px(uint tex_idx, ivec2 pos, uint v)
+{
+#ifndef INTERLACED
+    imageStore(dst[tex_idx], pos, uvec4(v));
+#else
+    imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), 
uvec4(v));
+#endif
+}
+
+/* 7.4 Inverse Transform */
+void idct(uint block, uint offset, uint stride)
+{
+    float c0 = blocks[block][0*stride + offset];
+    float c1 = blocks[block][1*stride + offset];
+    float c2 = blocks[block][2*stride + offset];
+    float c3 = blocks[block][3*stride + offset];
+    float c4 = blocks[block][4*stride + offset];
+    float c5 = blocks[block][5*stride + offset];
+    float c6 = blocks[block][6*stride + offset];
+    float c7 = blocks[block][7*stride + offset];
+
+    float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6);
+    float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6);
+
+    float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 
0.46193981170654296875;
+    float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 
0.46193981170654296875;
+
+    float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 
0.19134169816970825195;
+    float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 
0.19134169816970825195;
+
+    float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7;
+    float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7;
+
+    float tmp5 = (c5 - c7) *  1.4142134189605712891 + (c5 - c7) + (c1 - c3);
+    float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3);
+
+    float m1 = tmp3 *  2.6131260395050048828 + tmp5;
+    float m4 = tmp3 * -2.6131260395050048828 + tmp5;
+
+    float m2 = tmp4 *  1.0823919773101806641 + tmp6;
+    float m3 = tmp4 * -1.0823919773101806641 + tmp6;
+
+    blocks[block][0*stride + offset] = m1 *  0.49039259552955627441  + a1;
+    blocks[block][7*stride + offset] = m1 * -0.49039259552955627441  + a1;
+    blocks[block][1*stride + offset] = m2 *  0.41573479771614074707  + a2;
+    blocks[block][6*stride + offset] = m2 * -0.41573479771614074707  + a2;
+    blocks[block][2*stride + offset] = m3 *  0.27778509259223937988  + a3;
+    blocks[block][5*stride + offset] = m3 * -0.27778509259223937988  + a3;
+    blocks[block][3*stride + offset] = m4 *  0.097545139491558074951 + a4;
+    blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4;
+}
+
+void main(void)
+{
+    uvec3 gid = gl_GlobalInvocationID, lid = gl_LocalInvocationID;
+    uint comp = gid.z, block = (lid.y << 2) | (lid.x >> 3), idx = lid.x & 0x7;
+    uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
+    bool act = gid.x < mb_width << (4 - chroma_shift);
+
+    /* Coalesced load of DCT coeffs in shared memory, second part of inverse 
quantization */
+    if (act) {
+        /**
+         * According to spec indexing an array in push constant memory with
+         * a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
+         * so copy the whole matrix locally.
+         */
+        uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
+        [[unroll]] for (uint i = 0; i < 8; ++i) {
+            int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | 
i))), 16);
+            blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+        }
+    }
+
+    /* Row-wise iDCT */
+    barrier();
+    idct(block, idx * 9, 1);
+
+    /* Column-wise iDCT */
+    barrier();
+    idct(block, idx, 9);
+
+    float fact = 1.0f / (1 << (12 - depth)), off = 1 << (depth - 1);
+    int maxv = (1 << depth) - 1;
+
+    /* 7.5.1 Color Component Samples. Rescale, clamp and write back to global 
memory */
+    barrier();
+    if (act) {
+        [[unroll]] for (uint i = 0; i < 8; ++i) {
+            float v = blocks[block][i * 9 + idx] * fact + off;
+            put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, 
maxv));
+        }
+    }
+}
diff --git a/libavcodec/vulkan/prores_reset.comp 
b/libavcodec/vulkan/prores_reset.comp
new file mode 100644
index 0000000000..51cbc6b3d9
--- /dev/null
+++ b/libavcodec/vulkan/prores_reset.comp
@@ -0,0 +1,38 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void main(void)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+#ifndef INTERLACED
+    ivec2 pos = ivec2(gid);
+#else
+    ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field);
+#endif
+
+    /* Clear luma plane */
+    imageStore(dst[0], pos, uvec4(0));
+
+    /* Clear chroma plane */
+    if (gid.x < mb_width << (4 - log2_chroma_w)) {
+        imageStore(dst[1], pos, uvec4(0));
+        imageStore(dst[2], pos, uvec4(0));
+    }
+
+    /* Alpha plane doesn't need a clear because it is not sparsely encoded */
+}
diff --git a/libavcodec/vulkan/prores_vld.comp 
b/libavcodec/vulkan/prores_vld.comp
new file mode 100644
index 0000000000..00e78e08ff
--- /dev/null
+++ b/libavcodec/vulkan/prores_vld.comp
@@ -0,0 +1,317 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define U8(x)  (uint8_t (x))
+#define U16(x) (uint16_t(x))
+
+void put_px(uint tex_idx, ivec2 pos, uint v)
+{
+#ifndef INTERLACED
+    imageStore(dst[tex_idx], pos, uvec4(v));
+#else
+    imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), 
uvec4(v));
+#endif
+}
+
+/* 7.5.3 Pixel Arrangement */
+ivec2 pos_to_block(uint pos, uint luma)
+{
+    return ivec2((pos & -luma - 2) + luma >> 1, pos >> luma & 1) << 3;
+}
+
+/* 7.1.1.2 Signed Golomb Combination Codes */
+uint to_signed(uint x)
+{
+    return (x >> 1) ^ -(x & 1);
+}
+
+/* 7.1.1.1 Golomb Combination Codes */
+uint decode_codeword(inout GetBitContext gb, int codebook)
+{
+    int last_rice_q = bitfieldExtract(codebook, 0, 4),
+        krice       = bitfieldExtract(codebook, 4, 4),
+        kexp        = bitfieldExtract(codebook, 8, 4);
+
+    int q = 31 - findMSB(show_bits(gb, 32));
+    if (q <= last_rice_q) {
+        /* Golomb-Rice encoding */
+        return (get_bits(gb, krice + q + 1) & ~(1 << krice)) + (q << krice);
+    } else {
+        /* exp-Golomb encoding */
+        return get_bits(gb, (q << 1) + kexp - last_rice_q) - (1 << kexp) + 
((last_rice_q + 1) << krice);
+    }
+}
+
+void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+    uint is_luma = uint(gid.z == 0);
+    uint chroma_shift = bool(is_luma) ? 0 : log2_chroma_w;
+
+    uint num_blocks = mb_count << (2 - chroma_shift);
+    ivec2 base_pos = ivec2(mb_pos.x << (4 - chroma_shift), mb_pos.y << 4);
+
+    /* 7.1.1.3 DC Coefficients */
+    {
+        /* First coeff */
+        uint c = to_signed(decode_codeword(gb, 0x650));
+        put_px(gid.z, base_pos, c * qscale & 0xffff);
+
+        /**
+         * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | 
((kexp or kexp + 1) << 8)
+         * According to the SMPTE document, abs(prev_dc_diff) should be used
+         * to index the table, duplicating the entries removes the abs 
operation.
+         */
+        const uint16_t dc_codebook[] = { U16(0x100),
+                                         U16(0x210), U16(0x210),
+                                         U16(0x321), U16(0x321),
+                                         U16(0x430), U16(0x430), };
+
+        uint cw = 5, prev_dc_diff = 0;
+        for (int i = 1; i < num_blocks; ++i) {
+            cw = decode_codeword(gb, dc_codebook[min(cw, 6)]);
+
+            int s = int(prev_dc_diff) >> 31;
+            c += prev_dc_diff = (to_signed(cw) ^ s) - s;
+
+            put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 
0xffff);
+        }
+    }
+
+    /* 7.1.1.4 AC Coefficients */
+    {
+        /* Table 10 */
+        const uint16_t ac_run_codebook  [] = { U16(0x102), U16(0x102), 
U16(0x101), U16(0x101),
+                                               U16(0x100), U16(0x211), 
U16(0x211), U16(0x211),
+                                               U16(0x211), U16(0x210), 
U16(0x210), U16(0x210),
+                                               U16(0x210), U16(0x210), 
U16(0x210), U16(0x320), };
+
+        /* Table 11 */
+        const uint16_t ac_level_codebook[] = { U16(0x202), U16(0x101), 
U16(0x102), U16(0x100),
+                                               U16(0x210), U16(0x210), 
U16(0x210), U16(0x210),
+                                               U16(0x320) };
+
+#ifndef INTERLACED
+        /* Figure 4, encoded as (x << 0) | (y << 4) */
+        const uint8_t scan_tbl[] = {
+            U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), 
U8(0x12), U8(0x13),
+            U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), 
U8(0x32), U8(0x33),
+            U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), 
U8(0x07), U8(0x16),
+            U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), 
U8(0x36), U8(0x37),
+            U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), 
U8(0x43), U8(0x52),
+            U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), 
U8(0x45), U8(0x54),
+            U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), 
U8(0x47), U8(0x56),
+            U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), 
U8(0x76), U8(0x77),
+        };
+#else
+        /* Figure 5 */
+        const uint8_t scan_tbl[] = {
+            U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), 
U8(0x21), U8(0x31),
+            U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), 
U8(0x23), U8(0x33),
+            U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), 
U8(0x70), U8(0x61),
+            U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), 
U8(0x63), U8(0x73),
+            U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), 
U8(0x34), U8(0x25),
+            U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), 
U8(0x54), U8(0x45),
+            U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), 
U8(0x74), U8(0x65),
+            U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), 
U8(0x67), U8(0x77),
+        };
+#endif
+
+        uint block_mask  = num_blocks - 1;
+        uint block_shift = findLSB(num_blocks);
+
+        uint pos = num_blocks - 1, run = 4, level = 1, s;
+        while (pos < num_blocks << 6) {
+            int left = left_bits(gb);
+            if (left <= 0 || (left < 32 && show_bits(gb, left) == 0))
+                break;
+
+            run   = decode_codeword(gb, ac_run_codebook  [min(run,   15)]);
+            level = decode_codeword(gb, ac_level_codebook[min(level, 8 )]);
+            s     = get_bits(gb, 1);
+
+            pos += run + 1;
+
+            uint bidx  = pos & block_mask, scan = scan_tbl[pos >> block_shift];
+            ivec2 spos = pos_to_block(bidx, is_luma);
+            ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
+
+            uint c = ((level + 1) ^ -s) + s;
+            put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
+        }
+    }
+}
+
+/* 7.1.2 Scanned Alpha */
+void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+
+    ivec2 base_pos = ivec2(mb_pos) << 4;
+    uint block_shift = findMSB(mb_count) + 4, block_mask = (1 << block_shift) 
- 1;
+
+    uint mask = (1 << (4 << alpha_info)) - 1;
+    uint num_values = (mb_count << 4) * min(height - (gid.y << 4), 16);
+
+    int num_cw_bits  = alpha_info == 1 ? 5 : 8,
+        num_flc_bits = alpha_info == 1 ? 9 : 17;
+
+    uint alpha_rescale_lshift = alpha_info == 1 ? depth - 8 : 16,
+         alpha_rescale_rshift = 16 - depth;
+
+    uint alpha = -1;
+    for (uint pos = 0; pos < num_values;) {
+        uint diff, run;
+
+        /* Decode run value */
+        {
+            uint bits = show_bits(gb, num_cw_bits), q = num_cw_bits - 1 - 
findMSB(bits);
+
+            /* Tables 13/14 */
+            if (q != 0) {
+                uint m = (bits >> 1) + 1, s = bits & 1;
+                diff = (m ^ -s) + s;
+                skip_bits(gb, num_cw_bits);
+            } else {
+                diff = get_bits(gb, num_flc_bits);
+            }
+
+            alpha = alpha + diff & mask;
+        }
+
+        /* Decode run length */
+        {
+            uint bits = show_bits(gb, 5), q = 4 - findMSB(bits);
+
+            /* Table 12 */
+            if (q == 0) {
+                run = 1;
+                skip_bits(gb, 1);
+            } else if (q <= 4) {
+                run = bits + 1;
+                skip_bits(gb, 5);
+            } else {
+                run = get_bits(gb, 16) + 1;
+            }
+
+            run = min(run, num_values - pos);
+        }
+
+        /**
+         * FFmpeg doesn't support color and alpha with different precision,
+         * so we need to rescale to the color range.
+         */
+        uint val = (alpha << alpha_rescale_lshift) | (alpha >> 
alpha_rescale_rshift);
+        for (uint end = pos + run; pos < end; ++pos)
+            put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), 
val & 0xffff);
+    }
+}
+
+void main(void)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+    if (gid.x >= slice_width || gid.y >= slice_height)
+        return;
+
+    uint slice_idx = gid.y * slice_width + gid.x;
+    uint slice_off  = slice_offsets[slice_idx],
+         slice_size = slice_offsets[slice_idx + 1] - slice_off;
+
+    u8buf bs = u8buf(slice_data + slice_off);
+
+    /* Decode slice header */
+    uint hdr_size, y_size, u_size, v_size, a_size;
+    hdr_size = bs[0].v >> 3;
+
+    /* Table 15 */
+    uint qidx   = clamp(bs[1].v, 1, 224),
+         qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
+
+    y_size = (uint(bs[2].v) << 8) | bs[3].v;
+    u_size = (uint(bs[4].v) << 8) | bs[5].v;
+
+    /**
+     * The alpha_info field can be 0 even when an alpha plane is present,
+     * if skip_alpha is enabled, so use the header size instead.
+     */
+    if (hdr_size > 6)
+        v_size = (uint(bs[6].v) << 8) | bs[7].v;
+    else
+        v_size = slice_size - hdr_size - y_size - u_size;
+
+    a_size = slice_size - hdr_size - y_size - u_size - v_size;
+
+    GetBitContext gb;
+    switch (gid.z) {
+        case 0:
+            init_get_bits(gb, u8buf(bs + hdr_size),                            
int(y_size));
+            break;
+        case 1:
+            init_get_bits(gb, u8buf(bs + hdr_size + y_size),                   
int(u_size));
+            break;
+        case 2:
+            init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size),          
int(v_size));
+            break;
+        case 3:
+            init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size + v_size), 
int(a_size));
+            break;
+    }
+
+    /**
+     * Support for the grayscale "extension" in the prores_aw encoder.
+     * According to the spec, entropy coded data should never be empty,
+     * and instead contain at least the DC coefficients.
+     * This avoids undefined behavior.
+     */
+    if (left_bits(gb) == 0)
+        return;
+
+    /**
+     * 4 ProRes Frame Structure
+     * ProRes tiles pictures into a grid of slices, whose size is determined
+     * by the log2_slice_width parameter (height is always 1 MB).
+     * Each slice has a width of (1 << log2_slice_width) MBs, until the picture
+     * cannot accommodate a full one. At this point, the remaining space
+     * is recursively completed using the first smaller power of two that fits
+     * (see Figure 1).
+     * The maximum number of extra slices is 3, when log2_slice_width is 3,
+     * with sizes 4, 2 and 1 MBs.
+     * The mb_width parameter therefore also represents the number of full 
slices,
+     * when interpreted as a fixed-point number with log2_slice_width 
fractional bits.
+     */
+    uint frac      = bitfieldExtract(uint(mb_width), 0, log2_slice_width),
+         num_extra = bitCount(frac);
+
+    uint diff = slice_width - gid.x - 1,
+         off  = max(int(diff - num_extra + 1) << 2, 0);
+
+    uint log2_width = min(findLSB(frac - diff >> diff) + diff + off, 
log2_slice_width);
+
+    uint mb_x = (min(gid.x, slice_width - num_extra) << log2_slice_width) +
+                (frac & (0xf << log2_width + 1)),
+         mb_y = gid.y;
+    uint mb_count = 1 << log2_width;
+
+    if (gid.z < 3) {
+        /* Color entropy decoding, inverse scanning, first part of inverse 
quantization */
+        decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
+    } else {
+        /* Alpha entropy decoding */
+        decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
+    }
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index b038d456dd..cbf2ab8194 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -26,7 +26,8 @@
 
 #define DECODER_IS_SDR(codec_id) \
     (((codec_id) == AV_CODEC_ID_FFV1) || \
-     ((codec_id) == AV_CODEC_ID_PRORES_RAW))
+     ((codec_id) == AV_CODEC_ID_PRORES_RAW) || \
+     ((codec_id) == AV_CODEC_ID_PRORES))
 
 #if CONFIG_H264_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
@@ -46,6 +47,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
 #if CONFIG_PRORES_RAW_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
 #endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc;
+#endif
 
 static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_H264_VULKAN_HWACCEL
@@ -66,6 +70,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_PRORES_RAW_VULKAN_HWACCEL
     &ff_vk_dec_prores_raw_desc,
 #endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+    &ff_vk_dec_prores_desc,
+#endif
 };
 
 typedef struct FFVulkanDecodeProfileData {
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
new file mode 100644
index 0000000000..2602be112b
--- /dev/null
+++ b/libavcodec/vulkan_prores.c
@@ -0,0 +1,541 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "proresdec.h"
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+#include "libavutil/mem.h"
+#include "libavutil/vulkan.h"
+#include "libavutil/vulkan_loader.h"
+#include "libavutil/vulkan_spirv.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_prores_reset_comp;
+extern const char *ff_source_prores_vld_comp;
+extern const char *ff_source_prores_idct_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = {
+    .codec_id    = AV_CODEC_ID_PRORES,
+    .queue_flags = VK_QUEUE_COMPUTE_BIT,
+};
+
+typedef struct ProresVulkanDecodePicture {
+    FFVulkanDecodePicture vp;
+
+    AVBufferRef *slice_offset_buf;
+    uint32_t slice_num;
+
+    uint32_t bitstream_start;
+    uint32_t bitstream_size;
+} ProresVulkanDecodePicture;
+
+typedef struct ProresVulkanDecodeContext {
+    struct ProresVulkanShaderVariants {
+        FFVulkanShader reset;
+        FFVulkanShader vld;
+        FFVulkanShader idct;
+    } shaders[2]; /* Progressive/interlaced */
+
+    AVBufferPool *slice_offset_pool;
+} ProresVulkanDecodeContext;
+
+typedef struct ProresVkParameters {
+    VkDeviceAddress slice_data;
+    uint32_t bitstream_size;
+
+    uint16_t width;
+    uint16_t height;
+    uint16_t mb_width;
+    uint16_t mb_height;
+    uint16_t slice_width;
+    uint16_t slice_height;
+    uint8_t  log2_slice_width;
+    uint8_t  log2_chroma_w;
+    uint8_t  depth;
+    uint8_t  alpha_info;
+    uint8_t  bottom_field;
+
+    uint8_t  qmat_luma  [64];
+    uint8_t  qmat_chroma[64];
+} ProresVkParameters;
+
+static int vk_prores_start_frame(AVCodecContext          *avctx,
+                                 const AVBufferRef       *buffer_ref,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t       size)
+{
+    ProresContext             *pr = avctx->priv_data;
+    FFVulkanDecodeContext    *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared     *ctx = dec->shared_ctx;
+    ProresVulkanDecodeContext *pv = ctx->sd_ctx;
+    ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
+    FFVulkanDecodePicture     *vp = &pp->vp;
+
+    int err;
+
+    /* Host map the input slices data if supported */
+    if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
+        RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
+                                  buffer_ref,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
+
+    /* Allocate slice offsets buffer */
+    RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
+                                &pp->slice_offset_buf,
+                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                NULL, (pr->slice_count + 1) * sizeof(uint32_t),
+                                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                                VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+
+    /* Prepare frame to be used */
+    RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1,
+                                       FF_VK_REP_NATIVE, 0));
+
+    pp->slice_num = 0;
+    pp->bitstream_start = pp->bitstream_size = 0;
+
+fail:
+    return err;
+}
+
+static int vk_prores_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t  *data,
+                                  uint32_t        size)
+{
+    ProresContext             *pr = avctx->priv_data;
+    ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
+    FFVulkanDecodePicture     *vp = &pp->vp;
+
+    FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data;
+    FFVkBuffer *slices_buf   = vp->slices_buf ? (FFVkBuffer 
*)vp->slices_buf->data : NULL;
+
+    /* Skip picture header */
+    if (slices_buf && slices_buf->host_ref && !pp->slice_num)
+        pp->bitstream_size = data - slices_buf->mapped_mem;
+
+    AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 0) * sizeof(uint32_t),
+            pp->bitstream_size);
+    AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 1) * sizeof(uint32_t),
+            pp->bitstream_size += size);
+
+    if (!slices_buf || !slices_buf->host_ref) {
+        int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+                                         &pp->slice_num, NULL);
+        if (err < 0)
+            return err;
+    } else {
+        pp->slice_num++;
+    }
+
+    return 0;
+}
+
+static int vk_prores_end_frame(AVCodecContext *avctx)
+{
+    ProresContext             *pr = avctx->priv_data;
+    FFVulkanDecodeContext    *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared     *ctx = dec->shared_ctx;
+    FFVulkanFunctions         *vk = &ctx->s.vkfn;
+    ProresVulkanDecodeContext *pv = ctx->sd_ctx;
+    ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
+    FFVulkanDecodePicture     *vp = &pp->vp;
+
+    ProresVkParameters pd;
+    FFVkBuffer *slice_data, *slice_offsets;
+    struct ProresVulkanShaderVariants *shaders;
+    VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
+    VkBufferMemoryBarrier2 buf_bar[2];
+    int nb_img_bar = 0, nb_buf_bar = 0, err;
+    const AVPixFmtDescriptor *pix_desc;
+
+    if (!pp->slice_num)
+        return 0;
+
+    pix_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    if (!pix_desc)
+        return AVERROR(EINVAL);
+
+    slice_data    = (FFVkBuffer *)vp->slices_buf->data;
+    slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
+
+    shaders = &pv->shaders[pr->frame_type != 0];
+
+    pd = (ProresVkParameters) {
+        .slice_data       = slice_data->address,
+        .bitstream_size   = pp->bitstream_size,
+
+        .width            = avctx->width,
+        .height           = avctx->height,
+        .mb_width         = pr->mb_width,
+        .mb_height        = pr->mb_height,
+        .slice_width      = pr->slice_count / pr->mb_height,
+        .slice_height     = pr->mb_height,
+        .log2_slice_width = av_log2(pr->slice_mb_width),
+        .log2_chroma_w    = pix_desc->log2_chroma_w,
+        .depth            = avctx->bits_per_raw_sample,
+        .alpha_info       = pr->alpha_info,
+        .bottom_field     = pr->first_field ^ (pr->frame_type == 1),
+    };
+
+    memcpy(pd.qmat_luma,   pr->qmat_luma,   sizeof(pd.qmat_luma  ));
+    memcpy(pd.qmat_chroma, pr->qmat_chroma, sizeof(pd.qmat_chroma));
+
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+    RET(ff_vk_exec_start(&ctx->s, exec));
+
+    /* Prepare deps */
+    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame,
+                                 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+                                    pr->frame));
+
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
+                               (AVBufferRef *[]){ vp->slices_buf, 
pp->slice_offset_buf },
+                               2, 0));
+
+    /* Transfer ownership to the exec context */
+    vp->slices_buf = pp->slice_offset_buf = NULL;
+
+    /* Input frame barrier */
+    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers    = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+        .pImageMemoryBarriers     = img_bar,
+        .imageMemoryBarrierCount  = nb_img_bar,
+    });
+    nb_img_bar = nb_buf_bar = 0;
+
+    /* Reset */
+    ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->reset,
+                                  pr->frame, vp->view.out,
+                                  0, 0,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->reset,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->reset);
+
+    vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1);
+
+    /* Input frame barrier after reset */
+    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers    = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+        .pImageMemoryBarriers     = img_bar,
+        .imageMemoryBarrierCount  = nb_img_bar,
+    });
+    nb_img_bar = nb_buf_bar = 0;
+
+    /* Entropy decode */
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
+                                    0, 0, 0,
+                                    slice_offsets,
+                                    0, (pp->slice_num + 1) * sizeof(uint32_t),
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
+                                  pr->frame, vp->view.out,
+                                  0, 1,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->vld,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->vld);
+
+    vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 
3), AV_CEIL_RSHIFT(pr->mb_height, 3),
+                    3 + !!pr->alpha_info);
+
+    /* Synchronize vld and idct shaders */
+    nb_img_bar = 0;
+    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers    = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+        .pImageMemoryBarriers     = img_bar,
+        .imageMemoryBarrierCount  = nb_img_bar,
+    });
+    nb_img_bar = nb_buf_bar = 0;
+
+    /* Inverse transform */
+    ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
+                                  pr->frame, vp->view.out,
+                                  0, 0,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->idct);
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->idct,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->mb_width, 1), pr->mb_height, 
3);
+
+    RET(ff_vk_exec_submit(&ctx->s, exec));
+
+fail:
+    return err;
+}
+
+static int add_push_data(FFVulkanShader *shd)
+{
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+    GLSLC(1,    u8buf    slice_data;                               );
+    GLSLC(1,    uint     bitstream_size;                           );
+    GLSLC(0,                                                       );
+    GLSLC(1,    uint16_t width;                                    );
+    GLSLC(1,    uint16_t height;                                   );
+    GLSLC(1,    uint16_t mb_width;                                 );
+    GLSLC(1,    uint16_t mb_height;                                );
+    GLSLC(1,    uint16_t slice_width;                              );
+    GLSLC(1,    uint16_t slice_height;                             );
+    GLSLC(1,    uint8_t  log2_slice_width;                         );
+    GLSLC(1,    uint8_t  log2_chroma_w;                            );
+    GLSLC(1,    uint8_t  depth;                                    );
+    GLSLC(1,    uint8_t  alpha_info;                               );
+    GLSLC(1,    uint8_t  bottom_field;                             );
+    GLSLC(0,                                                       );
+    GLSLC(1,    uint8_t  qmat_luma  [8*8];                         );
+    GLSLC(1,    uint8_t  qmat_chroma[8*8];                         );
+    GLSLC(0, };                                                    );
+
+    return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters),
+                                       VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
+                       FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                       FFVulkanShader *shd, const char *name, const char 
*entrypoint,
+                       FFVulkanDescriptorSetBinding *descs, int num_descs,
+                       const char *source, int local_size, int interlaced)
+{
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+    int err;
+
+    RET(ff_vk_shader_init(s, shd, name,
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          local_size >> 16 & 0xff, local_size >> 8 & 0xff, 
local_size >> 0 & 0xff,
+                          0));
+
+    /* Common code */
+    GLSLD(ff_source_common_comp);
+
+    /* Push constants layout */
+    RET(add_push_data(shd));
+
+    RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
+
+    if (interlaced)
+        av_bprintf(&shd->src, "#define INTERLACED\n");
+
+    /* Main code */
+    GLSLD(source);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, entrypoint,
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, entrypoint));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return 0;
+}
+
+static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx)
+{
+    ProresVulkanDecodeContext *pv = ctx->sd_ctx;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) {
+        ff_vk_shader_free(&ctx->s, &pv->shaders[i].reset);
+        ff_vk_shader_free(&ctx->s, &pv->shaders[i].vld);
+        ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct);
+    }
+
+    av_buffer_pool_uninit(&pv->slice_offset_pool);
+
+    av_freep(&pv);
+}
+
+static int vk_decode_prores_init(AVCodecContext *avctx)
+{
+    FFVulkanDecodeContext        *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared         *ctx = NULL;
+
+    AVHWFramesContext *out_frames_ctx;
+    ProresVulkanDecodeContext *pv;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc_set;
+    int max_num_slices, i, err;
+
+    max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    err = ff_vk_decode_init(avctx);
+    if (err < 0)
+        return err;
+    ctx = dec->shared_ctx;
+
+    pv = ctx->sd_ctx = av_mallocz(sizeof(*pv));
+    if (!pv) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+
+    ctx->sd_ctx_free = vk_decode_prores_uninit;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) { /* 
Progressive/interlaced */
+        struct ProresVulkanShaderVariants *shaders = &pv->shaders[i];
+
+        desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name       = "dst",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .dimensions = 2,
+                .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
+                                                   FF_VK_REP_NATIVE),
+                .mem_quali  = "writeonly",
+                .elems      = 
av_pix_fmt_count_planes(out_frames_ctx->sw_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+        };
+        RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->reset,
+                        "prores_dec_reset", "main", desc_set, 1,
+                        ff_source_prores_reset_comp, 0x080801, i));
+
+        desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name        = "slice_offsets_buf",
+                .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+                .mem_quali   = "readonly",
+                .buf_content = "uint32_t slice_offsets",
+                .buf_elems   = max_num_slices + 1,
+            },
+            {
+                .name       = "dst",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .dimensions = 2,
+                .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
+                                                   FF_VK_REP_NATIVE),
+                .mem_quali  = "writeonly",
+                .elems      = 
av_pix_fmt_count_planes(out_frames_ctx->sw_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+        };
+        RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
+                        "prores_dec_vld", "main", desc_set, 2,
+                        ff_source_prores_vld_comp, 0x080801, i));
+
+        desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name       = "dst",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .dimensions = 2,
+                .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
+                                                   FF_VK_REP_NATIVE),
+                .elems      = 
av_pix_fmt_count_planes(out_frames_ctx->sw_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+        };
+        RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
+                        "prores_dec_idct", "main", desc_set, 1,
+                        ff_source_prores_idct_comp, 0x200201, i));
+    }
+
+    err = 0;
+
+fail:
+    spv->uninit(&spv);
+
+    return err;
+}
+
+static void vk_prores_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+    AVHWDeviceContext    *dev_ctx = _hwctx.nc;
+    ProresVulkanDecodePicture *pp = data;
+
+    ff_vk_decode_free_frame(dev_ctx, &pp->vp);
+}
+
+const FFHWAccel ff_prores_vulkan_hwaccel = {
+    .p.name                = "prores_vulkan",
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_PRORES,
+    .p.pix_fmt             = AV_PIX_FMT_VULKAN,
+    .start_frame           = &vk_prores_start_frame,
+    .decode_slice          = &vk_prores_decode_slice,
+    .end_frame             = &vk_prores_end_frame,
+    .free_frame_priv       = &vk_prores_free_frame_priv,
+    .frame_priv_data_size  = sizeof(ProresVulkanDecodePicture),
+    .init                  = &vk_decode_prores_init,
+    .update_thread_context = &ff_vk_update_thread_context,
+    .decode_params         = &ff_vk_params_invalidate,
+    .flush                 = &ff_vk_decode_flush,
+    .uninit                = &ff_vk_decode_uninit,
+    .frame_params          = &ff_vk_frame_params,
+    .priv_data_size        = sizeof(FFVulkanDecodeContext),
+    .caps_internal         = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};

commit 3fd55d952efe421908a93d689aa0caf5523b5158
Author:     averne <[email protected]>
AuthorDate: Tue Aug 12 14:31:00 2025 +0200
Commit:     Lynne <[email protected]>
CommitDate: Sat Oct 25 19:54:13 2025 +0000

    avcodec/proresdec: save slice width parameter in codec context
    
    Save the log2_desired_slice_size_in_mb syntax
    element in the codec context.
    Required by the Vulkan hwaccel to compute slice
    widths and positions.

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 578d87f815..0b6556107f 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -335,6 +335,9 @@ static int decode_picture_header(AVCodecContext *avctx, 
const uint8_t *buf, cons
         return AVERROR_INVALIDDATA;
     }
 
+    ctx->slice_mb_width  = 1 << log2_slice_mb_width;
+    ctx->slice_mb_height = 1 << log2_slice_mb_height;
+
     ctx->mb_width  = (avctx->width  + 15) >> 4;
     if (ctx->frame_type)
         ctx->mb_height = (avctx->height + 31) >> 5;
@@ -344,7 +347,7 @@ static int decode_picture_header(AVCodecContext *avctx, 
const uint8_t *buf, cons
     // QT ignores the written value
     // slice_count = AV_RB16(buf + 5);
     slice_count = ctx->mb_height * ((ctx->mb_width >> log2_slice_mb_width) +
-                                    av_popcount(ctx->mb_width & (1 << 
log2_slice_mb_width) - 1));
+                                    av_popcount(ctx->mb_width & 
ctx->slice_mb_width - 1));
 
     if (ctx->slice_count != slice_count || !ctx->slices) {
         av_freep(&ctx->slices);
@@ -367,7 +370,7 @@ static int decode_picture_header(AVCodecContext *avctx, 
const uint8_t *buf, cons
     index_ptr = buf + hdr_size;
     data_ptr  = index_ptr + slice_count*2;
 
-    slice_mb_count = 1 << log2_slice_mb_width;
+    slice_mb_count = ctx->slice_mb_width;
     mb_x = 0;
     mb_y = 0;
 
@@ -392,7 +395,7 @@ static int decode_picture_header(AVCodecContext *avctx, 
const uint8_t *buf, cons
 
         mb_x += slice_mb_count;
         if (mb_x == ctx->mb_width) {
-            slice_mb_count = 1 << log2_slice_mb_width;
+            slice_mb_count = ctx->slice_mb_width;
             mb_x = 0;
             mb_y++;
         }
diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h
index d15e5b2c1d..d33eab149b 100644
--- a/libavcodec/proresdec.h
+++ b/libavcodec/proresdec.h
@@ -52,6 +52,8 @@ typedef struct {
     int slice_count;             ///< number of slices in the current picture
     unsigned mb_width;           ///< width of the current picture in mb
     unsigned mb_height;          ///< height of the current picture in mb
+    unsigned slice_mb_width;     ///< maximum width of a slice in mb
+    unsigned slice_mb_height;    ///< maximum height of a slice in mb
     uint8_t progressive_scan[64];
     uint8_t interlaced_scan[64];
     const uint8_t *scan;

commit 987368ef25dd601a92a5d3709985aa28c509f179
Author:     averne <[email protected]>
AuthorDate: Thu Jun 12 19:31:44 2025 +0200
Commit:     Lynne <[email protected]>
CommitDate: Sat Oct 25 19:54:13 2025 +0000

    avcodec/prores: adapt hwaccel code for slice-based accelerators
    
    In preparation for the Vulkan hwaccel.
    The existing hwaccel code was designed around
    videotoolbox, which ingests the whole frame
    bitstream including picture headers.
    This adapts the code to accomodate lower-level,
    slice-based hwaccels.

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 6fc8671d4d..578d87f815 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -756,6 +756,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame 
*frame,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     int frame_hdr_size, pic_size, ret;
+    int i;
 
     if (buf_size < 28 || AV_RL32(buf + 4) != AV_RL32("icpf")) {
         av_log(avctx, AV_LOG_ERROR, "invalid frame header\n");
@@ -786,28 +787,30 @@ static int decode_frame(AVCodecContext *avctx, AVFrame 
*frame,
 
     ff_thread_finish_setup(avctx);
 
+ decode_picture:
+    pic_size = decode_picture_header(avctx, buf, buf_size);
+    if (pic_size < 0) {
+        av_log(avctx, AV_LOG_ERROR, "error decoding picture header\n");
+        return pic_size;
+    }
+
     if (HWACCEL_MAX && avctx->hwaccel) {
         const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
+
         ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, 
avpkt->size);
         if (ret < 0)
             return ret;
-        ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
-        if (ret < 0)
-            return ret;
+
+        for (i = 0; i < ctx->slice_count; ++i) {
+            ret = hwaccel->decode_slice(avctx, ctx->slices[i].data, 
ctx->slices[i].data_size);
+            if (ret < 0)
+                return ret;
+        }
+
         ret = hwaccel->end_frame(avctx);
         if (ret < 0)
             return ret;
-        goto finish;
-    }
-
- decode_picture:
-    pic_size = decode_picture_header(avctx, buf, buf_size);
-    if (pic_size < 0) {
-        av_log(avctx, AV_LOG_ERROR, "error decoding picture header\n");
-        return pic_size;
-    }
-
-    if ((ret = decode_picture(avctx)) < 0) {
+    } else if ((ret = decode_picture(avctx)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "error decoding picture\n");
         return ret;
     }
@@ -820,7 +823,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame 
*frame,
         goto decode_picture;
     }
 
-finish:
     av_refstruct_unref(&ctx->hwaccel_last_picture_private);
 
     *got_frame      = 1;
diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
index ccba249140..2cd22cba1a 100644
--- a/libavcodec/videotoolbox.c
+++ b/libavcodec/videotoolbox.c
@@ -1161,16 +1161,21 @@ static int 
videotoolbox_prores_start_frame(AVCodecContext *avctx,
                                            const uint8_t *buffer,
                                            uint32_t size)
 {
-    return 0;
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    ProresContext *ctx = avctx->priv_data;
+
+    /* Videotoolbox decodes both fields simultaneously */
+    if (!ctx->first_field)
+        return 0;
+
+    return ff_videotoolbox_buffer_copy(vtctx, buffer, size);
 }
 
 static int videotoolbox_prores_decode_slice(AVCodecContext *avctx,
                                           const uint8_t *buffer,
                                           uint32_t size)
 {
-    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
-
-    return ff_videotoolbox_buffer_copy(vtctx, buffer, size);
+    return 0;
 }
 
 static int videotoolbox_prores_end_frame(AVCodecContext *avctx)
@@ -1178,6 +1183,9 @@ static int videotoolbox_prores_end_frame(AVCodecContext 
*avctx)
     ProresContext *ctx = avctx->priv_data;
     AVFrame *frame = ctx->frame;
 
+    if (!ctx->first_field)
+        return 0;
+
     return ff_videotoolbox_common_end_frame(avctx, frame);
 }
 

commit 9195af77eb1be9ab350263a988069f32f085d0f2
Author:     averne <[email protected]>
AuthorDate: Thu Jun 12 19:28:53 2025 +0200
Commit:     Lynne <[email protected]>
CommitDate: Sat Oct 25 19:54:13 2025 +0000

    proresdec: allocate private memory for hwaccel pictures
    
    In preparation for the Vulkan hwaccel, which
    stores per-frame acceleration structures.

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index deaf84bda0..6fc8671d4d 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -777,6 +777,13 @@ static int decode_frame(AVCodecContext *avctx, AVFrame 
*frame,
 
     if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
         return ret;
+
+    av_refstruct_unref(&ctx->hwaccel_last_picture_private);
+    FFSWAP(void *, ctx->hwaccel_picture_private, 
ctx->hwaccel_last_picture_private);
+
+    if ((ret = ff_hwaccel_frame_priv_alloc(avctx, 
&ctx->hwaccel_picture_private)) < 0)
+        return ret;
+
     ff_thread_finish_setup(avctx);
 
     if (HWACCEL_MAX && avctx->hwaccel) {
@@ -814,6 +821,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame 
*frame,
     }
 
 finish:
+    av_refstruct_unref(&ctx->hwaccel_last_picture_private);
+
     *got_frame      = 1;
 
     return avpkt->size;
@@ -824,6 +833,8 @@ static av_cold int decode_close(AVCodecContext *avctx)
     ProresContext *ctx = avctx->priv_data;
 
     av_freep(&ctx->slices);
+    av_refstruct_unref(&ctx->hwaccel_picture_private);
+    av_refstruct_unref(&ctx->hwaccel_last_picture_private);
 
     return 0;
 }
diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h
index 230fca41f2..d15e5b2c1d 100644
--- a/libavcodec/proresdec.h
+++ b/libavcodec/proresdec.h
@@ -44,6 +44,7 @@ typedef struct {
     BlockDSPContext bdsp;
     ProresDSPContext prodsp;
     AVFrame *frame;
+    void *hwaccel_picture_private, *hwaccel_last_picture_private;
     int frame_type;              ///< 0 = progressive, 1 = tff, 2 = bff
     uint8_t qmat_luma[64];
     uint8_t qmat_chroma[64];

-----------------------------------------------------------------------

Summary of changes:
 configure                                          |   2 +
 libavcodec/Makefile                                |   3 +-
 libavcodec/hwaccels.h                              |   1 +
 libavcodec/proresdec.c                             |  60 ++-
 libavcodec/proresdec.h                             |   3 +
 libavcodec/videotoolbox.c                          |  16 +-
 libavcodec/vulkan/Makefile                         |   5 +
 libavcodec/vulkan/prores_idct.comp                 | 123 +++++
 libavcodec/{qsv_api.c => vulkan/prores_reset.comp} |  34 +-
 libavcodec/vulkan/prores_vld.comp                  | 317 ++++++++++++
 libavcodec/vulkan_decode.c                         |   9 +-
 libavcodec/vulkan_prores.c                         | 541 +++++++++++++++++++++
 12 files changed, 1070 insertions(+), 44 deletions(-)
 create mode 100644 libavcodec/vulkan/prores_idct.comp
 copy libavcodec/{qsv_api.c => vulkan/prores_reset.comp} (62%)
 create mode 100644 libavcodec/vulkan/prores_vld.comp
 create mode 100644 libavcodec/vulkan_prores.c


hooks/post-receive
-- 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] branch master updated. 98412edfed lavc: add a ProRes Vulkan hwaccel

Reply via email to