This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 713f191c241a0b5c3b7a96a3d0efe3ad7faa9e3a
Author:     Lynne <[email protected]>
AuthorDate: Wed May 27 04:02:37 2026 +0900
Commit:     Lynne <[email protected]>
CommitDate: Wed Jun 3 14:12:50 2026 +0900

    vulkan_ffv1: add Bayer decoder
    
    Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1_vulkan.c                           |  9 +++
 libavcodec/vulkan/Makefile                         |  4 +-
 libavcodec/vulkan/ffv1_common.glsl                 |  2 +
 libavcodec/vulkan/ffv1_dec.comp.glsl               | 76 +++++++++++++++++++++-
 ..._dec_rgb.comp.glsl => ffv1_dec_bayer.comp.glsl} |  1 +
 ...b.comp.glsl => ffv1_dec_bayer_golomb.comp.glsl} |  2 +
 libavcodec/vulkan/ffv1_dec_setup.comp.glsl         |  2 +-
 libavcodec/vulkan/ffv1_vlc.glsl                    |  2 +-
 libavcodec/vulkan_ffv1.c                           | 33 ++++++++--
 9 files changed, 118 insertions(+), 13 deletions(-)

diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c
index 73c2b2a7ce..81843b1701 100644
--- a/libavcodec/ffv1_vulkan.c
+++ b/libavcodec/ffv1_vulkan.c
@@ -41,6 +41,15 @@ void ff_ffv1_vk_set_common_sl(AVCodecContext *avctx, 
FFV1Context *f,
     }
 
     int bits = desc->comp[0].depth;
+    /* Bayer pixfmts report misleading per-component depth in comp[0].depth
+     * (it counts the fraction of bits each component contributes per output
+     * pixel, not the per-sample bit width). Use bits_per_raw_sample. The
+     * encoder fills f->bits_per_raw_sample directly; the decoder only
+     * fills f->avctx->bits_per_raw_sample. Prefer the FFV1Context field
+     * with the avctx field as a fallback so this works from both sides. */
+    if (f->bayer)
+        bits = f->bits_per_raw_sample ? f->bits_per_raw_sample
+                                      : f->avctx->bits_per_raw_sample;
     SPEC_LIST_ADD(sl,  5, 32, (uint32_t)(1ULL << bits));
     SPEC_LIST_ADD(sl,  6, 32, f->colorspace);
     SPEC_LIST_ADD(sl,  7, 32, f->transparency);
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index f86931727d..0425548978 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -25,7 +25,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += 
vulkan/ffv1_dec_setup.comp.spv.o \
                                       vulkan/ffv1_dec_rgb.comp.spv.o \
                                       vulkan/ffv1_dec_rgb_golomb.comp.spv.o \
                                       vulkan/ffv1_dec_rgb_float.comp.spv.o \
-                                      
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o
+                                      
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o \
+                                      vulkan/ffv1_dec_bayer.comp.spv.o \
+                                      vulkan/ffv1_dec_bayer_golomb.comp.spv.o
 
 OBJS-$(CONFIG_PRORES_KS_VULKAN_ENCODER) += 
vulkan/prores_ks_alpha_data.comp.spv.o \
                                            
vulkan/prores_ks_slice_data.comp.spv.o \
diff --git a/libavcodec/vulkan/ffv1_common.glsl 
b/libavcodec/vulkan/ffv1_common.glsl
index 3d3b6753c6..36bce88a4a 100644
--- a/libavcodec/vulkan/ffv1_common.glsl
+++ b/libavcodec/vulkan/ffv1_common.glsl
@@ -122,6 +122,8 @@ layout (set = 1, binding = 0, scalar) SB_QUALI buffer 
slice_ctx_buf {
 uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
 {
     uint mpw = 1 << chroma_shift;
+    if (colorspace == 2)
+        mpw = max(mpw, 2u);
     uint awidth = align(width, mpw);
 
     if ((version < 4) || ((version == 4) && (micro_version < 3)))
diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl 
b/libavcodec/vulkan/ffv1_dec.comp.glsl
index 82835e8f92..9ad2ec2442 100644
--- a/libavcodec/vulkan/ffv1_dec.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec.comp.glsl
@@ -247,6 +247,43 @@ void decode_line(ivec2 sp, int w,
 }
 #endif
 
+#ifdef BAYER
+void writeout_bayer(uint slice_idx, in SliceContext sc, ivec2 sp, int w, int y)
+{
+    memoryBarrierImage();
+    barrier();
+
+    int offset = rct_offset;
+
+    for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
+        ivec2 lpos = sp + LADDR(ivec2(x, y));
+        ivec2 pos  = sc.slice_pos + ivec2(int(x) << 1, y << 1);
+
+        int g_r = int(imageLoad(dec[0], lpos)[0]);
+        int g_b = int(imageLoad(dec[1], lpos)[0]);
+        int b   = int(imageLoad(dec[2], lpos)[0]);
+        int r   = int(imageLoad(dec[3], lpos)[0]);
+
+        if (sc.slice_coding_mode != 1) {
+            b -= offset;
+            r -= offset;
+            g_r -= (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2;
+            b += g_r;
+            r += g_r;
+
+            int gd = g_b - offset;
+            g_b = g_r - (gd >> 1);
+            g_r = g_b + gd;
+        }
+
+        imageStore(dst[0], pos + ivec2(0, 0), uvec4(r));
+        imageStore(dst[0], pos + ivec2(1, 0), uvec4(g_r));
+        imageStore(dst[0], pos + ivec2(0, 1), uvec4(g_b));
+        imageStore(dst[0], pos + ivec2(1, 1), uvec4(b));
+    }
+}
+#endif
+
 #ifdef RGB
 ivec4 transform_sample(ivec4 pix, ivec2 rct_coef, int offset)
 {
@@ -319,14 +356,32 @@ void decode_slice(in SliceContext sc, uint slice_idx)
     ivec2 sp = sc.slice_pos;
     u16vec4 bits = get_slice_bits(sc);
 
-#ifdef RGB
+#ifdef BAYER
+    /* Bayer logical dims: 2x2 blocks at half resolution */
+    w >>= 1;
+    int bayer_h = sc.slice_dim.y >> 1;
+    sp.x >>= 1;
+    sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
+    /* c_bits = bps + 1 (the +1 is for is_rgb). For PCM mode, all planes use
+     * raw bps. For non-PCM, gm uses bps; gd/b-gm/r-gm use bps+1. */
+    if (sc.slice_coding_mode == 0)
+        bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits);
+    else
+        bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1);
+#elif defined(RGB)
     sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
 #endif
 
 #ifndef GOLOMB
     /* PCM coding */
     if (sc.slice_coding_mode == 1) {
-#ifdef RGB
+#ifdef BAYER
+        for (int y = 0; y < bayer_h; y++) {
+            for (int p = 0; p < 4; p++)
+                decode_line_pcm(sp, w, y, p);
+            writeout_bayer(slice_idx, sc, sp, w, y);
+        }
+#elif defined(RGB)
         for (int y = 0; y < sc.slice_dim.y; y++) {
             for (int p = 0; p < color_planes; p++)
                 decode_line_pcm(sp, w, y, p);
@@ -347,16 +402,31 @@ void decode_slice(in SliceContext sc, uint slice_idx)
     }
 #endif
 
+#ifdef BAYER
+    u8vec4 quant_table_idx = sc.quant_table_idx.xzyy;
+    u32vec4 slice_state_off = (slice_idx*codec_planes +
+                               uvec4(0, 2, 1, 1))*plane_state_size;
+#else
     u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
     u32vec4 slice_state_off = (slice_idx*codec_planes +
                                uvec4(0, 1, 1, 2))*plane_state_size;
+#endif
 
 #ifdef GOLOMB
     slice_state_off >>= 3; // division by VLC_STATE_SIZE
     golomb_init();
 #endif
 
-#ifdef RGB
+#ifdef BAYER
+    int run_index = 0;
+    for (int y = 0; y < bayer_h; y++) {
+        for (int p = 0; p < 4; p++)
+            decode_line(sp, w, y, p, bits[p],
+                        slice_state_off[p], quant_table_idx[p], run_index);
+
+        writeout_bayer(slice_idx, sc, sp, w, y);
+    }
+#elif defined(RGB)
     int run_index = 0;
     for (int y = 0; y < sc.slice_dim.y; y++) {
         for (int p = 0; p < color_planes; p++)
diff --git a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl
similarity index 98%
copy from libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
copy to libavcodec/vulkan/ffv1_dec_bayer.comp.glsl
index 72dc31ba15..6ceb15a35a 100644
--- a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl
@@ -27,4 +27,5 @@
 layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
 
 #define RGB
+#define BAYER
 #include "ffv1_dec.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl
similarity index 97%
copy from libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
copy to libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl
index 72dc31ba15..ca8b7bada0 100644
--- a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl
@@ -27,4 +27,6 @@
 layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
 
 #define RGB
+#define BAYER
+#define GOLOMB
 #include "ffv1_dec.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl
index ff57c57dc3..d000116012 100644
--- a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl
@@ -191,7 +191,7 @@ bool decode_slice_header(uint slice_idx, inout SliceContext 
sc)
     if (version >= 4) {
         sc.slice_reset_contexts = get_rac(rc_state[0]);
         sc.slice_coding_mode = get_usymbol(0);
-        if (sc.slice_coding_mode != 1 && colorspace == 1) {
+        if (sc.slice_coding_mode != 1 && colorspace != 0) {
             sc.slice_rct_coef.g = int(get_usymbol(0));
             sc.slice_rct_coef.r = int(get_usymbol(0));
             if (sc.slice_rct_coef.g + sc.slice_rct_coef.r > 4)
diff --git a/libavcodec/vulkan/ffv1_vlc.glsl b/libavcodec/vulkan/ffv1_vlc.glsl
index f362d3afbb..51f3d7ddae 100644
--- a/libavcodec/vulkan/ffv1_vlc.glsl
+++ b/libavcodec/vulkan/ffv1_vlc.glsl
@@ -35,7 +35,7 @@ void update_vlc_state(inout VlcState state, in int v)
     int drift = state.drift;
     int count = state.count;
     int bias = state.bias;
-    state.error_sum += uint16_t(abs(v));
+    state.error_sum += uint32_t(abs(v));
     drift           += v;
 
     if (count == 128) { // FIXME: variable
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index 4056f3958a..e8dafb2505 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -54,6 +54,12 @@ extern const unsigned int ff_ffv1_dec_rgb_float_comp_spv_len;
 extern const unsigned char ff_ffv1_dec_rgb_float_golomb_comp_spv_data[];
 extern const unsigned int ff_ffv1_dec_rgb_float_golomb_comp_spv_len;
 
+extern const unsigned char ff_ffv1_dec_bayer_comp_spv_data[];
+extern const unsigned int ff_ffv1_dec_bayer_comp_spv_len;
+
+extern const unsigned char ff_ffv1_dec_bayer_golomb_comp_spv_data[];
+extern const unsigned int ff_ffv1_dec_bayer_golomb_comp_spv_len;
+
 const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
     .codec_id         = AV_CODEC_ID_FFV1,
     .queue_flags      = VK_QUEUE_COMPUTE_BIT,
@@ -393,7 +399,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
         nb_img_bar = 0;
         nb_buf_bar = 0;
 
-        for (int i = 0; i < color_planes; i++)
+        /* The intermediate frame has 4 planes (GBRAP16/32). Clear all of
+         * them since the bayer decoder uses all four. */
+        int n_dec_planes = f->bayer ? 4 : color_planes;
+        for (int i = 0; i < n_dec_planes; i++)
             vk->CmdClearColorImage(exec->buf, vkf->img[i], 
VK_IMAGE_LAYOUT_GENERAL,
                                    &((VkClearColorValue) { 0 }),
                                    1, &((VkImageSubresourceRange) {
@@ -519,7 +528,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
                                       1, 5,
                                       VK_IMAGE_LAYOUT_GENERAL,
                                       VK_NULL_HANDLE);
-    if (fltmap_buf)
+    if (fltmap_buf && !f->bayer)
         ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode,
                                         1, 6, 0,
                                         fltmap_buf,
@@ -651,7 +660,8 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
                               FFVkExecPool *pool, FFVulkanShader *shd,
                               AVHWFramesContext *dec_frames_ctx,
                               AVHWFramesContext *out_frames_ctx,
-                              VkSpecializationInfo *sl, int ac, int rgb)
+                              VkSpecializationInfo *sl, int ac, int rgb,
+                              int bayer)
 {
     int err;
 
@@ -707,10 +717,19 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
         },
     };
     ff_vk_shader_add_descriptor_set(s, shd, desc_set,
-                                    5 + rgb + (f->micro_version >= 9),
+                                    5 + rgb + (!bayer && f->micro_version >= 
9),
                                     0, 0);
 
-    if (f->version >=4 && f->micro_version >= 9) {
+    if (bayer) {
+        if (ac == AC_GOLOMB_RICE)
+            ff_vk_shader_link(s, shd,
+                              ff_ffv1_dec_bayer_golomb_comp_spv_data,
+                              ff_ffv1_dec_bayer_golomb_comp_spv_len, "main");
+        else
+            ff_vk_shader_link(s, shd,
+                              ff_ffv1_dec_bayer_comp_spv_data,
+                              ff_ffv1_dec_bayer_comp_spv_len, "main");
+    } else if (f->version >=4 && f->micro_version >= 9) {
         if (ac == AC_GOLOMB_RICE)
             ff_vk_shader_link(s, shd,
                               ff_ffv1_dec_rgb_float_golomb_comp_spv_data,
@@ -809,7 +828,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
     FFv1VulkanDecodeContext *fv;
 
     if (f->version < 3 ||
-        (f->version == 4 && f->micro_version >= 10))
+        (f->version == 4 && f->micro_version >= 10 && !f->bayer))
         return AVERROR(ENOTSUP);
 
     /* Streams with a low amount of slices will usually be much slower
@@ -861,7 +880,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
 
     /* Decode shaders */
     RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode,
-                           dctx, hwfc, sl, f->ac, is_rgb));
+                           dctx, hwfc, sl, f->ac, is_rgb, f->bayer));
 
     /* Init static data */
     RET(ff_ffv1_vk_init_consts(&ctx->s, &fv->consts_buf, f));

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to