PR #23268 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23268
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23268.patch

Depends/based on #22528
Implements encoding and decoding of Bayer data in the Vulkan FFv1 decoder and 
encoder.


From ff8eafa85b46dabb4d48d90f5e1087181aeb70fb Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Tue, 26 May 2026 11:38:29 +0900
Subject: [PATCH 1/8] vulkan/ffv1: add 32-bit float RGB encoding and a rice +
 remap path

This implements 32-bit float RGB encoding and makes the Vulkan implementation
on-par with the C implementation.

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1_vulkan.h                      |   1 +
 libavcodec/ffv1enc_vulkan.c                   | 153 ++++++++++++++++-
 libavcodec/vulkan/Makefile                    |   4 +-
 libavcodec/vulkan/ffv1_common.glsl            |   1 +
 libavcodec/vulkan/ffv1_enc.comp.glsl          |  27 ++-
 .../ffv1_enc_rgb_float_golomb.comp.glsl       |  33 ++++
 libavcodec/vulkan/ffv1_enc_setup.comp.glsl    | 127 +++++++++++++-
 libavcodec/vulkan/ffv1_enc_sort32.comp.glsl   | 155 ++++++++++++++++++
 8 files changed, 477 insertions(+), 24 deletions(-)
 create mode 100644 libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl
 create mode 100644 libavcodec/vulkan/ffv1_enc_sort32.comp.glsl

diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h
index 9a206afaca..d6ae0f3fee 100644
--- a/libavcodec/ffv1_vulkan.h
+++ b/libavcodec/ffv1_vulkan.h
@@ -48,6 +48,7 @@ typedef struct FFv1ShaderParams {
     int sar[2];
     int pic_mode;
     uint32_t slice_size_max;
+    uint32_t max_pixels_per_slice;
 } FFv1ShaderParams;
 
 #endif /* AVCODEC_FFV1_VULKAN_H */
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 92d46f7ddf..7c22ced785 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -72,6 +72,7 @@ typedef struct VulkanEncodeFFv1Context {
 
     FFVulkanShader rct_search;
     FFVulkanShader remap;
+    FFVulkanShader sort32;
     FFVulkanShader setup;
     FFVulkanShader reset;
     FFVulkanShader enc;
@@ -101,6 +102,8 @@ typedef struct VulkanEncodeFFv1Context {
     int optimize_rct;
 
     int is_rgb;
+    int is_float32;
+    uint32_t max_pixels_per_slice;
     int ppi;
     int chunks;
 } VulkanEncodeFFv1Context;
@@ -141,6 +144,12 @@ extern const unsigned int ff_ffv1_enc_remap_comp_spv_len;
 extern const unsigned char ff_ffv1_enc_rgb_float_comp_spv_data[];
 extern const unsigned int ff_ffv1_enc_rgb_float_comp_spv_len;
 
+extern const unsigned char ff_ffv1_enc_rgb_float_golomb_comp_spv_data[];
+extern const unsigned int ff_ffv1_enc_rgb_float_golomb_comp_spv_len;
+
+extern const unsigned char ff_ffv1_enc_sort32_comp_spv_data[];
+extern const unsigned int ff_ffv1_enc_sort32_comp_spv_len;
+
 static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec,
                           AVFrame *enc_in, VkImageView *enc_in_views,
                           FFVkBuffer *slice_data_buf, uint32_t slice_data_size,
@@ -203,6 +212,37 @@ static int run_remap(AVCodecContext *avctx, 
FFVkExecContext *exec,
     return 0;
 }
 
+static int run_sort32(AVCodecContext *avctx, FFVkExecContext *exec,
+                      AVFrame *enc_in, VkImageView *enc_in_views,
+                      FFVkBuffer *units_buf, uint32_t units_size,
+                      FFv1ShaderParams *pd)
+{
+    VulkanEncodeFFv1Context *fv = avctx->priv_data;
+    FFV1Context *f = &fv->ctx;
+    FFVulkanFunctions *vk = &fv->s.vkfn;
+
+    /* Update descriptors */
+    ff_vk_shader_update_img_array(&fv->s, exec, &fv->sort32,
+                                  enc_in, enc_in_views,
+                                  1, 1,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+    ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->sort32,
+                                    1, 2, 0,
+                                    units_buf,
+                                    0, units_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+
+    ff_vk_exec_bind_shader(&fv->s, exec, &fv->sort32);
+    ff_vk_shader_update_push_const(&fv->s, exec, &fv->sort32,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(FFv1ShaderParams), pd);
+
+    vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+    return 0;
+}
+
 static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
                                            FFVkExecContext *exec,
                                            const AVFrame *pict)
@@ -279,15 +319,19 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
     slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
 
     if (f->remap_mode) {
-        const AVPixFmtDescriptor *desc = 
av_pix_fmt_desc_get(fv->s.frames->sw_format);
-        remap_data_size = 4*(1 << desc->comp[0].depth)*sizeof(uint32_t);
+        if (fv->is_float32) {
+            /* Per (slice, plane): [units : max_pixels*2 uints] + [bitmap : 
max_pixels uints]. */
+            remap_data_size = 4*fv->max_pixels_per_slice*3*sizeof(uint32_t);
+        } else {
+            const AVPixFmtDescriptor *desc = 
av_pix_fmt_desc_get(fv->s.frames->sw_format);
+            remap_data_size = 4*(1 << desc->comp[0].depth)*sizeof(uint32_t);
+        }
 
         RET(ff_vk_get_pooled_buffer(&fv->s, &fv->remap_data_pool,
                                     &remap_data_ref,
                                     VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
                                     NULL, remap_data_size*f->slice_count,
                                     VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
-
         remap_data_buf = (FFVkBuffer *)remap_data_ref->data;
     }
 
@@ -348,6 +392,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
         .pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 :
                     !(pict->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 2 : 1,
         .slice_size_max = out_data_buf->size / f->slice_count,
+        .max_pixels_per_slice = fv->max_pixels_per_slice,
     };
 
     for (int i = 0; i < f->quant_table_count; i++) {
@@ -420,8 +465,13 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
     }
 
     if (f->remap_mode) {
-        RET(run_remap(avctx, exec, src, src_views,
-                      remap_data_buf, remap_data_size, &pd));
+        if (fv->is_float32) {
+            RET(run_sort32(avctx, exec, src, src_views,
+                           remap_data_buf, remap_data_size, &pd));
+        } else {
+            RET(run_remap(avctx, exec, src, src_views,
+                          remap_data_buf, remap_data_size, &pd));
+        }
 
         /* Make sure the writes are visible to the setup shader */
         ff_vk_buf_barrier(buf_bar[nb_buf_bar++], remap_data_buf,
@@ -519,6 +569,14 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext 
*avctx,
                       COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
                       COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
                       0, slice_data_size*f->slice_count);
+
+    /* Setup writes the per-pixel compact_idx (or compact_idx-of-value)
+     * back into the remap buffer; the encode shader reads it. */
+    if (f->remap_mode)
+        ff_vk_buf_barrier(buf_bar[nb_buf_bar++], remap_data_buf,
+                          COMPUTE_SHADER_BIT, SHADER_READ_BIT, 
SHADER_WRITE_BIT,
+                          COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR,
+                          0, remap_data_size*f->slice_count);
     if (f->key_frame || fv->force_pcm)
         ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
                           COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
@@ -906,6 +964,54 @@ fail:
     return err;
 }
 
+static int init_sort32_shader(AVCodecContext *avctx, VkSpecializationInfo *sl)
+{
+    int err;
+    VulkanEncodeFFv1Context *fv = avctx->priv_data;
+    FFVulkanShader *shd = &fv->sort32;
+
+    uint32_t wg_x = FFMIN(fv->max_pixels_per_slice, 256);
+    ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
+                      (uint32_t []) { wg_x, 1, 1 }, 0);
+
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    const FFVulkanDescriptorSetBinding desc_set_const[] = {
+        { /* rangecoder_buf */
+            .type   = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set_const, 1, 1, 0);
+
+    const FFVulkanDescriptorSetBinding desc_set[] = {
+        { /* slice_data_buf */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+        { /* src */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+            .elems  = av_pix_fmt_count_planes(fv->s.frames->sw_format),
+        },
+        { /* units */
+            .type   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0);
+
+    RET(ff_vk_shader_link(&fv->s, shd,
+                          ff_ffv1_enc_sort32_comp_spv_data,
+                          ff_ffv1_enc_sort32_comp_spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+    return err;
+}
+
 static int init_remap_shader(AVCodecContext *avctx, VkSpecializationInfo *sl)
 {
     int err;
@@ -1105,9 +1211,14 @@ static int init_encode_shader(AVCodecContext *avctx, 
VkSpecializationInfo *sl)
                                     4 + fv->is_rgb + !!f->remap_mode, 0, 0);
 
     if (f->remap_mode) {
-        ff_vk_shader_link(&fv->s, shd,
-                          ff_ffv1_enc_rgb_float_comp_spv_data,
-                          ff_ffv1_enc_rgb_float_comp_spv_len, "main");
+        if (fv->ctx.ac == AC_GOLOMB_RICE)
+            ff_vk_shader_link(&fv->s, shd,
+                              ff_ffv1_enc_rgb_float_golomb_comp_spv_data,
+                              ff_ffv1_enc_rgb_float_golomb_comp_spv_len, 
"main");
+        else
+            ff_vk_shader_link(&fv->s, shd,
+                              ff_ffv1_enc_rgb_float_comp_spv_data,
+                              ff_ffv1_enc_rgb_float_comp_spv_len, "main");
     } else if (fv->ctx.ac == AC_GOLOMB_RICE) {
         if (fv->is_rgb)
             ff_vk_shader_link(&fv->s, shd,
@@ -1304,6 +1415,26 @@ static av_cold int 
vulkan_encode_ffv1_init(AVCodecContext *avctx)
     fv->is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) 
&&
                  !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8);
 
+    fv->is_float32 = (avctx->sw_pix_fmt == AV_PIX_FMT_GBRPF32 ||
+                      avctx->sw_pix_fmt == AV_PIX_FMT_GBRAPF32);
+
+    if (fv->is_float32) {
+        /* Compute the worst-case slice geometry. With version >= 4 the slice
+         * boundaries are computed via slice_coord() which rounds up, so any
+         * single slice has at most ceil(width/num_h_slices) * 
ceil(height/num_v_slices)
+         * pixels. */
+        uint32_t mw = (avctx->width  + f->num_h_slices - 1) / f->num_h_slices;
+        uint32_t mh = (avctx->height + f->num_v_slices - 1) / f->num_v_slices;
+        /* Round up to next pow2 for bitonic sort */
+        uint32_t n = 1;
+        uint32_t pn = mw*mh;
+        while (n < pn)
+            n <<= 1;
+        if (n < 2)
+            n = 2;
+        fv->max_pixels_per_slice = n;
+    }
+
     /* Init rct search shader */
     fv->optimize_rct = fv->is_rgb && f->version >= 4 &&
                        !fv->force_pcm && fv->optimize_rct;
@@ -1325,7 +1456,10 @@ static av_cold int 
vulkan_encode_ffv1_init(AVCodecContext *avctx)
     }
 
     if (f->remap_mode) {
-        err = init_remap_shader(avctx, sl);
+        if (fv->is_float32)
+            err = init_sort32_shader(avctx, sl);
+        else
+            err = init_remap_shader(avctx, sl);
         if (err < 0)
             return err;
     }
@@ -1420,6 +1554,7 @@ static av_cold int 
vulkan_encode_ffv1_close(AVCodecContext *avctx)
     ff_vk_shader_free(&fv->s, &fv->reset);
     ff_vk_shader_free(&fv->s, &fv->setup);
     ff_vk_shader_free(&fv->s, &fv->remap);
+    ff_vk_shader_free(&fv->s, &fv->sort32);
     ff_vk_shader_free(&fv->s, &fv->rct_search);
 
     if (fv->exec_ctx_info) {
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index c6817967c7..f86931727d 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -13,7 +13,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += 
vulkan/ffv1_enc_setup.comp.spv.o \
                                       vulkan/ffv1_enc_rgb_golomb.comp.spv.o \
                                       vulkan/ffv1_enc_rct_search.comp.spv.o \
                                       vulkan/ffv1_enc_remap.comp.spv.o \
-                                      vulkan/ffv1_enc_rgb_float.comp.spv.o
+                                      vulkan/ffv1_enc_rgb_float.comp.spv.o \
+                                      
vulkan/ffv1_enc_rgb_float_golomb.comp.spv.o \
+                                      vulkan/ffv1_enc_sort32.comp.spv.o
 
 OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \
                                       vulkan/ffv1_dec_reset.comp.spv.o \
diff --git a/libavcodec/vulkan/ffv1_common.glsl 
b/libavcodec/vulkan/ffv1_common.glsl
index 8580a0777f..3d3b6753c6 100644
--- a/libavcodec/vulkan/ffv1_common.glsl
+++ b/libavcodec/vulkan/ffv1_common.glsl
@@ -75,6 +75,7 @@ layout (push_constant, scalar) uniform pushConstants {
     ivec2 sar;
     int pic_mode;
     uint slice_size_max;
+    uint max_pixels_per_slice;
 };
 
 #include "rangecoder.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc.comp.glsl 
b/libavcodec/vulkan/ffv1_enc.comp.glsl
index 90ce8293b9..1c30e91828 100644
--- a/libavcodec/vulkan/ffv1_enc.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc.comp.glsl
@@ -40,8 +40,8 @@ layout (set = 1, binding = 1, scalar) writeonly buffer 
slice_results_buf {
  * denormals before we get to look at them. */
 layout (set = 1, binding = 3) uniform uimage2D src[];
 #ifdef FLOAT
-layout (set = 1, binding = 5) readonly buffer fltmap_buf {
-    uint fltmap[][4][65536];
+layout (set = 1, binding = 5, scalar) readonly buffer fltmap_buf {
+    uint fltmap[];
 };
 #endif
 
@@ -239,11 +239,24 @@ ivec4 load_components(uint slice_idx, in SliceContext sc, 
ivec2 pos)
 {
     ivec4 pix;
 #ifdef FLOAT
-    /* Source view is r16_uint so imageLoad returns the raw fp16 bit pattern
-     * in .x; no conversion is performed and denormals survive. */
-    for (int i = 0; i < color_planes; i++) {
-        uint iv = imageLoad(src[i], pos)[0] & 0xFFFFu;
-        pix[i] = int(fltmap[slice_idx][i][iv]);
+    if (c_bits >= 32) {
+        /* 32-bit float: per-pixel-position bitmap lookup. The bitmap region
+         * follows the units region in the same buffer. */
+        ivec2 rel = pos - sc.slice_pos;
+        uint pixel_idx = uint(rel.x + sc.slice_dim.x*rel.y);
+        uint plane_stride = max_pixels_per_slice*3u;
+        for (int i = 0; i < color_planes; i++) {
+            uint base = (slice_idx*4u + uint(i))*plane_stride
+                        + max_pixels_per_slice*2u;
+            pix[i] = int(fltmap[base + pixel_idx]);
+        }
+    } else {
+        /* 16-bit float: value-indexed lookup. Source view is r16_uint so
+         * imageLoad returns the raw fp16 bit pattern in .x. */
+        for (int i = 0; i < color_planes; i++) {
+            uint iv = imageLoad(src[i], pos)[0] & 0xFFFFu;
+            pix[i] = int(fltmap[(slice_idx*4u + uint(i))*65536u + iv]);
+        }
     }
 #else
     pix = ivec4(imageLoad(src[0], pos));
diff --git a/libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl
new file mode 100644
index 0000000000..e4535eb08f
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl
@@ -0,0 +1,33 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_shader_image_load_formatted : require
+
+layout (set = 1, binding = 4) uniform uimage2D tmp;
+
+#define PB_UNALIGNED
+#define GOLOMB
+#define FLOAT
+#define RGB
+#include "ffv1_enc.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
index 53a8d7f13f..e931019a43 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
@@ -23,13 +23,13 @@
 #pragma shader_stage(compute)
 #extension GL_GOOGLE_include_directive : require
 
-#define NB_CONTEXTS 2
+#define NB_CONTEXTS 6
 #define FULL_RENORM
 #include "common.glsl"
 #include "ffv1_common.glsl"
 
-layout (set = 1, binding = 1) buffer fltmap_buf {
-    uint fltmap[][4][65536];
+layout (set = 1, binding = 1, scalar) buffer fltmap_buf {
+    uint fltmap[];
 };
 
 void init_slice(inout SliceContext sc, uint slice_idx)
@@ -81,6 +81,7 @@ void encode_histogram_remap(uint slice_idx, inout 
SliceContext sc)
     const int flip = (remap_mode == 2) ? 0x7FFF : 0;
 
     for (int p = 0; p < color_planes; p++) {
+        const uint base = (slice_idx*4u + uint(p))*65536u;
         uint j = 0;
         uint lu = 0;
         int run = 0;
@@ -90,15 +91,15 @@ void encode_histogram_remap(uint slice_idx, inout 
SliceContext sc)
 
         put_usymbol(0, 0);
 
-        for (int i = 0; i < NB_CONTEXTS; i++)
+        for (int i = 0; i < NB_CONTEXTS*CONTEXT_SIZE; i++)
             rc_state[i] = uint8_t(128);
 
         int cnt = 0;
         for (int i = 0; i < rct_offset; i++) {
             int ri = i ^ (((i & 0x8000) != 0) ? 0 : flip);
-            uint u = uint(fltmap[slice_idx][p][ri] != 0);
+            uint u = uint(fltmap[base + uint(ri)] != 0u);
 
-            fltmap[slice_idx][p][ri] = uint16_t(j);
+            fltmap[base + uint(ri)] = j;
             j += u;
 
             if (lu == u) {
@@ -117,6 +118,115 @@ void encode_histogram_remap(uint slice_idx, inout 
SliceContext sc)
     }
 }
 
+/* The 32-bit float remap uses 6 contexts: state[lu][category][bit] with
+ * lu = 0,1 and category = 0 (run/step-1), 1 (delta -- unused here), 2 (mul). 
*/
+#define CTX_F32(lu, cat) ((uint(lu)*3u + uint(cat))*CONTEXT_SIZE)
+
+void encode_float32_remap(uint slice_idx, inout SliceContext sc)
+{
+    const uint slice_w = uint(sc.slice_dim.x);
+    const uint slice_h = uint(sc.slice_dim.y);
+    const uint pixel_num = slice_w * slice_h;
+    const uint plane_stride = max_pixels_per_slice*3u;
+
+    for (int p = 0; p < color_planes; p++) {
+        /* Layout: per (slice, plane) we have [units : max_pixels*8 bytes]
+         * followed by [bitmap : max_pixels*4 bytes]. The units region is
+         * read-only here, the bitmap region is written. */
+        const uint plane_base = (slice_idx*4u + uint(p))*plane_stride;
+        const uint bitmap_base = plane_base + max_pixels_per_slice*2u;
+
+        for (int i = 0; i < NB_CONTEXTS*CONTEXT_SIZE; i++)
+            rc_state[i] = uint8_t(128);
+
+        put_usymbol(1, CTX_F32(0, 0));
+
+        for (int i = 0; i < NB_CONTEXTS*CONTEXT_SIZE; i++)
+            rc_state[i] = uint8_t(128);
+
+        /* last_val is the last unique value (or 0xFFFFFFFF as the "before
+         * any value" sentinel; this lets step = val - last_val give val+1
+         * for the first emission via unsigned wraparound). */
+        uint last_val = 0xFFFFFFFFu;
+        uint lu = 0;
+        uint run = 0;
+        int ci = -1;
+        bool emit_first_mul = true;
+
+        for (uint i = 0; i < pixel_num; i++) {
+            uint u_val = fltmap[plane_base + 2u*i + 0u];
+            uint u_ndx = fltmap[plane_base + 2u*i + 1u];
+
+            /* Duplicate of the previous unique value? Reuse ci. */
+            if (i > 0u && last_val == u_val) {
+                fltmap[bitmap_base + u_ndx] = uint(ci);
+                continue;
+            }
+
+            uint step = u_val - last_val;
+
+            if (lu == 0u) {
+                put_usymbol(step - 1u, CTX_F32(0, 0));
+
+                if (emit_first_mul) {
+                    put_usymbol(1, CTX_F32(0, 2));
+                    emit_first_mul = false;
+                }
+
+                last_val = u_val;
+                if (step == 1u) {
+                    lu = 1;
+                    run = 0;
+                }
+            } else {
+                if (step == 1u) {
+                    run++;
+                    last_val = u_val;
+                } else {
+                    if (run > 0u) {
+                        put_usymbol(run, CTX_F32(1, 0));
+                        put_usymbol(0, CTX_F32(1, 0));
+                        last_val += 2u;
+                    } else {
+                        put_usymbol(0, CTX_F32(1, 0));
+                        last_val += 1u;
+                    }
+                    lu = 0;
+                    run = 0;
+
+                    step = u_val - last_val;
+                    put_usymbol(step - 1u, CTX_F32(0, 0));
+
+                    last_val = u_val;
+                    if (step == 1u) {
+                        lu = 1;
+                        run = 0;
+                    }
+                }
+            }
+
+            ci++;
+            fltmap[bitmap_base + u_ndx] = uint(ci);
+        }
+
+        if (lu == 1u) {
+            if (run > 0u) {
+                put_usymbol(run, CTX_F32(1, 0));
+                put_usymbol(0, CTX_F32(1, 0));
+                last_val += 2u;
+            } else {
+                put_usymbol(0, CTX_F32(1, 0));
+                last_val += 1u;
+            }
+        }
+
+        if (last_val != 0xFFFFFFFFu)
+            put_usymbol(0xFFFFFFFFu - last_val, CTX_F32(0, 0));
+
+        sc.remap_count[p] = ci + 1;
+    }
+}
+
 void write_slice_header(uint slice_idx, inout SliceContext sc)
 {
     [[unroll]]
@@ -149,7 +259,10 @@ void write_slice_header(uint slice_idx, inout SliceContext 
sc)
 
         if (remap_mode != 0) {
             put_usymbol(remap_mode, 0);
-            encode_histogram_remap(slice_idx, sc);
+            if (c_bits >= 32)
+                encode_float32_remap(slice_idx, sc);
+            else
+                encode_histogram_remap(slice_idx, sc);
         }
     }
 }
diff --git a/libavcodec/vulkan/ffv1_enc_sort32.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_sort32.comp.glsl
new file mode 100644
index 0000000000..872c7daa2b
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_sort32.comp.glsl
@@ -0,0 +1,155 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+
+#define SB_QUALI readonly
+#include "common.glsl"
+#include "ffv1_common.glsl"
+
+layout (set = 1, binding = 1) uniform uimage2D src[];
+
+layout (set = 1, binding = 2, scalar) buffer fltmap_buf {
+    uint fltmap[];
+};
+
+/* The shared fltmap_buf is laid out per (slice, plane) as a
+ * [max_pixels_per_slice*3] uint block, where the first
+ * [max_pixels_per_slice*2] entries hold interleaved (val, ndx) pairs and
+ * the trailing [max_pixels_per_slice] entries are the bitmap region used
+ * by the setup/encode shaders. Padding past pixel_num is the sentinel
+ * (UINT32_MAX, UINT32_MAX) so it sorts to the end. */
+
+/* Per-workgroup bitonic-sort buffer. Limits a slice's pow2 size; large
+ * slices fall back to working in global memory. */
+shared u32vec2 smem[8192];
+
+void main(void)
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + 
gl_WorkGroupID.x;
+    uvec2 img_size = imageSize(src[0]);
+
+    uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
+                           gl_NumWorkGroups.x, 0);
+    uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
+                           gl_NumWorkGroups.x, 0);
+    uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
+                           gl_NumWorkGroups.y, 0);
+    uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
+                           gl_NumWorkGroups.y, 0);
+
+    uint slice_w = sxe - sxs;
+    uint slice_h = sye - sys;
+    uint pixel_num = slice_w * slice_h;
+
+    /* Round up to next pow2 for bitonic sort */
+    uint N = 1;
+    while (N < pixel_num)
+        N <<= 1;
+    N = max(N, 2);
+    if (N > max_pixels_per_slice)
+        N = max_pixels_per_slice;
+
+    const uint plane_stride = max_pixels_per_slice*3u;
+    const bool use_smem = N <= 8192u;
+
+    for (int p = 0; p < color_planes; p++) {
+        uint base = (slice_idx*4u + uint(p))*plane_stride;
+
+        /* Load pixels */
+        for (uint i = gl_LocalInvocationIndex; i < N;
+             i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) {
+            uint v, ndx;
+            if (i < pixel_num) {
+                uint y = i / slice_w;
+                uint x = i - y*slice_w;
+                /* Source is bound as r32ui (FF_VK_REP_NATIVE for r32_sfloat) 
so
+                 * imageLoad returns the raw bit pattern of the float. */
+                v = imageLoad(src[p], ivec2(sxs + x, sys + y))[0];
+                if (remap_mode == 2)
+                    v = ((v & 0x80000000u) != 0u) ? v : (v ^ 0x7FFFFFFFu);
+                ndx = i;
+            } else {
+                v = 0xFFFFFFFFu;
+                ndx = 0xFFFFFFFFu;
+            }
+            if (use_smem) {
+                smem[i] = u32vec2(v, ndx);
+            } else {
+                fltmap[base + 2u*i + 0u] = v;
+                fltmap[base + 2u*i + 1u] = ndx;
+            }
+        }
+        barrier();
+        if (!use_smem) memoryBarrierBuffer();
+
+        /* Bitonic sort of the (val, ndx) pairs. */
+        for (uint k = 2; k <= N; k <<= 1) {
+            for (uint j = k >> 1; j > 0; j >>= 1) {
+                for (uint i = gl_LocalInvocationIndex; i < N;
+                     i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) {
+                    uint partner = i ^ j;
+                    if (partner > i) {
+                        bool ascending = (i & k) == 0;
+                        u32vec2 a, b;
+                        if (use_smem) {
+                            a = smem[i];
+                            b = smem[partner];
+                        } else {
+                            a = u32vec2(fltmap[base + 2u*i + 0u],
+                                        fltmap[base + 2u*i + 1u]);
+                            b = u32vec2(fltmap[base + 2u*partner + 0u],
+                                        fltmap[base + 2u*partner + 1u]);
+                        }
+                        bool a_gt_b = (a.x > b.x) ||
+                                      (a.x == b.x && a.y > b.y);
+                        if (a_gt_b == ascending) {
+                            if (use_smem) {
+                                smem[i] = b;
+                                smem[partner] = a;
+                            } else {
+                                fltmap[base + 2u*i + 0u] = b.x;
+                                fltmap[base + 2u*i + 1u] = b.y;
+                                fltmap[base + 2u*partner + 0u] = a.x;
+                                fltmap[base + 2u*partner + 1u] = a.y;
+                            }
+                        }
+                    }
+                }
+                barrier();
+                if (!use_smem) memoryBarrierBuffer();
+            }
+        }
+
+        /* Write sorted pairs back to global */
+        if (use_smem) {
+            for (uint i = gl_LocalInvocationIndex; i < N;
+                 i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) {
+                u32vec2 u = smem[i];
+                fltmap[base + 2u*i + 0u] = u.x;
+                fltmap[base + 2u*i + 1u] = u.y;
+            }
+            barrier();
+        }
+    }
+}
-- 
2.52.0


From 0dedba039b239e4f4590c05b755cacc05549cf88 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Tue, 17 Mar 2026 13:37:39 +0100
Subject: [PATCH 2/8] ffv1enc: implement Bayer pixel format encoding

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1.c    |   6 ++-
 libavcodec/ffv1.h    |   2 +
 libavcodec/ffv1enc.c | 110 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 812989a892..1dcb7be28d 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -126,6 +126,8 @@ int ff_need_new_slices(int width, int num_h_slices, int 
chroma_shift) {
 
 int ff_slice_coord(const FFV1Context *f, int width, int sx, int num_h_slices, 
int chroma_shift) {
     int mpw = 1<<chroma_shift;
+    if (f->bayer)
+        mpw = FFMAX(mpw, 2);
     int awidth = FFALIGN(width, mpw);
 
     if (f->combined_version <= 0x40002)
@@ -233,7 +235,7 @@ void ff_ffv1_compute_bits_per_plane(const FFV1Context *f, 
FFV1SliceContext *sc,
         av_assert0(bits_per_raw_sample > 8); //breaks with lbd, needs review 
if added
 
     //bits with no RCT
-    for (int p=0; p<3+f->transparency; p++) {
+    for (int p=0; p<3+f->transparency+f->bayer; p++) {
         bits[p] = av_ceil_log2(sc->remap_count[p]);
         if (mask)
             mask[p] = (1<<bits[p]) - 1;
@@ -246,6 +248,8 @@ void ff_ffv1_compute_bits_per_plane(const FFV1Context *f, 
FFV1SliceContext *sc,
         bits[0] = av_ceil_log2(FFMAX3(sc->remap_count[0], sc->remap_count[1], 
sc->remap_count[2]));
         bits[1] = av_ceil_log2(sc->remap_count[0] + sc->remap_count[1]);
         bits[2] = av_ceil_log2(sc->remap_count[0] + sc->remap_count[2]);
+        if (f->bayer)
+            bits[3] = av_ceil_log2(sc->remap_count[0] + sc->remap_count[3]);
 
         //old version coded a bit more than needed
         if (f->combined_version < 0x40008) {
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 8a48e8e682..012b92ec21 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -153,6 +153,8 @@ typedef struct FFV1Context {
     int flt;
     int remap_mode;
     int remap_optimizer;
+    int bayer;
+    int bayer_order; /* 0 = RGGB (only supported value for now) */
     int maxsize_warned;
 
     int use32bit;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index cd346495f7..5e5974c035 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -435,7 +435,7 @@ static void set_micro_version(FFV1Context *f)
         if (f->version == 3) {
             f->micro_version = 4;
         } else if (f->version == 4) {
-            f->micro_version = 9;
+            f->micro_version = 10;
         } else
             av_assert0(0);
 
@@ -480,6 +480,8 @@ av_cold int ff_ffv1_write_extradata(AVCodecContext *avctx)
     put_symbol(&c, state, f->chroma_h_shift, 0);
     put_symbol(&c, state, f->chroma_v_shift, 0);
     put_rac(&c, state, f->transparency);
+    if (f->colorspace == 2)
+        put_symbol(&c, state, f->bayer_order, 0); /* 0 = RGGB */
     put_symbol(&c, state, f->num_h_slices - 1, 0);
     put_symbol(&c, state, f->num_v_slices - 1, 0);
 
@@ -566,9 +568,9 @@ static int sort_stt(FFV1Context *s, uint8_t stt[256])
 int ff_ffv1_encode_determine_slices(AVCodecContext *avctx)
 {
     FFV1Context *s = avctx->priv_data;
-    int plane_count = 1 + 2*s->chroma_planes + s->transparency;
-    int max_h_slices = AV_CEIL_RSHIFT(avctx->width , s->chroma_h_shift);
-    int max_v_slices = AV_CEIL_RSHIFT(avctx->height, s->chroma_v_shift);
+    int plane_count = 1 + 2*s->chroma_planes + s->bayer + s->transparency;
+    int max_h_slices = AV_CEIL_RSHIFT(avctx->width , s->bayer ? 1 : 
s->chroma_h_shift);
+    int max_v_slices = AV_CEIL_RSHIFT(avctx->height, s->bayer ? 1 : 
s->chroma_v_shift);
     s->num_v_slices = (avctx->width > 352 || avctx->height > 288 || 
!avctx->slices) ? 2 : 1;
     s->num_v_slices = FFMIN(s->num_v_slices, max_v_slices);
     for (; s->num_v_slices <= 32; s->num_v_slices++) {
@@ -694,6 +696,8 @@ av_cold int ff_ffv1_encode_init(AVCodecContext *avctx)
         s->plane_count = 2;
     if (!s->chroma_planes && s->version > 3)
         s->plane_count--;
+    if (s->bayer)
+        s->plane_count = 3;
 
     s->picture_number = 0;
 
@@ -804,6 +808,7 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext 
*avctx,
     FFV1Context *s = avctx->priv_data;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 
+    s->bayer = 0;
     s->plane_count = 3;
     switch(pix_fmt) {
     case AV_PIX_FMT_GRAY9:
@@ -911,6 +916,14 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext 
*avctx,
         s->use32bit = 1;
         s->version = FFMAX(s->version, 1);
         break;
+    case AV_PIX_FMT_BAYER_RGGB16:
+        s->colorspace = 2;
+        s->chroma_planes = 1;
+        s->bits_per_raw_sample = 16;
+        s->use32bit = 1;
+        s->version = FFMAX(s->version, 4);
+        s->bayer = 1;
+        break;
     case AV_PIX_FMT_GBRP:
     case AV_PIX_FMT_0RGB32:
         s->colorspace = 1;
@@ -1106,7 +1119,7 @@ static void encode_slice_header(FFV1Context *f, 
FFV1SliceContext *sc)
         if (sc->slice_coding_mode == 1)
             ff_ffv1_clear_slice_state(f, sc);
         put_symbol(c, state, sc->slice_coding_mode, 0);
-        if (sc->slice_coding_mode != 1 && f->colorspace == 1) {
+        if (sc->slice_coding_mode != 1 && f->colorspace != 0) {
             put_symbol(c, state, sc->slice_rct_by_coef, 0);
             put_symbol(c, state, sc->slice_rct_ry_coef, 0);
         }
@@ -1569,6 +1582,86 @@ static int encode_float32_rgb_frame(FFV1Context *f, 
FFV1SliceContext *sc,
     return 0;
 }
 
+static int encode_bayer_frame(FFV1Context *f, FFV1SliceContext *sc,
+                              const uint8_t *src[4],
+                              int w, int h, const int stride[4], int ac)
+{
+    const int pass1 = !!(f->avctx->flags & AV_CODEC_FLAG_PASS1);
+    const int ring_size = f->context_model ? 3 : 2;
+    TYPE *sample[4][3];
+
+    int bits[4], offset;
+    ff_ffv1_compute_bits_per_plane(f, sc, bits, &offset, NULL, 
f->bits_per_raw_sample);
+
+    w >>= 1;
+
+    sc->run_index = 0;
+
+    for (int p = 0; p < MAX_PLANES; ++p)
+        sample[p][2] = RENAME(sc->sample_buffer);
+
+    memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES *
+           (w + 6) * sizeof(*RENAME(sc->sample_buffer)));
+
+    for (int y = 0; y < h; y += 2) {
+        for (int i = 0; i < ring_size; i++)
+            for (int p = 0; p < MAX_PLANES; p++)
+                sample[p][i] = RENAME(sc->sample_buffer) + p*ring_size*(w+6) +
+                               ((h+i-y/2) % ring_size)*(w+6) + 3;
+
+        for (int x = 0; x < w; x++) {
+            const uint16_t *l1 = ((const uint16_t*)(src[0] + stride[0]*(y + 0) 
+ x*2*2));
+            const uint16_t *l2 = ((const uint16_t*)(src[0] + stride[0]*(y + 1) 
+ x*2*2));
+
+            int r, gr, gb, b;
+            r  = l1[0];
+            gr = l1[1];
+            gb = l2[0];
+            b  = l2[1];
+
+            if (sc->slice_coding_mode != 1) {
+               /**
+                * Bayer 2x2 RCT, based on:
+                * "Reversible color transform for Bayer color filter array 
images", S. Poomrittigul et al,
+                * APSIPA Transactions on Signal and Information Processing 
(2013) 2 (1): 1-10,
+                * doi:10.1017/ATSIP.2013.6 */
+               int gd = gr - gb;
+               int gm = gb + (gd >> 1);
+
+                b -= gm;
+                r -= gm;
+                gm += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) 
>> 2;
+                b += offset;
+                r += offset;
+                gd += offset;
+
+                gr = gm;
+                gb = gd;
+            }
+
+            sample[0][0][x] = gr;
+            sample[1][0][x] = gb;
+            sample[2][0][x] = b;
+            sample[3][0][x] = r;
+        }
+
+        for (int p = 0; p < 4; p++) {
+            int ret;
+            sample[p][0][-1] = sample[p][1][0  ];
+            sample[p][1][ w] = sample[p][1][w-1];
+            /* Plane contexts: gm=0 (luma), b-gm/r-gm=1 (chroma diff from
+             * green), gd=2 (own context - green-green diff has different
+             * statistics from both luma and chroma). */
+            ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p],
+                                      p == 1 ? 2 : (p > 1),
+                                      bits[p], ac, pass1);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
 
 static int encode_slice(AVCodecContext *c, void *arg)
 {
@@ -1664,6 +1757,8 @@ retry:
         ret |= encode_plane(f, sc, p->data[0] + (ps>>1) + ps*x + 
y*p->linesize[0], width, height, p->linesize[0], 1, 1, 2, ac);
     } else if (f->bits_per_raw_sample == 32) {
         ret = encode_float32_rgb_frame(f, sc, planes, width, height, 
p->linesize, ac);
+    } else if (f->bayer) {
+        ret = encode_bayer_frame(f, sc, planes, width, height, p->linesize, 
ac);
     } else if (f->use32bit) {
         ret = encode_rgb_frame32(f, sc, planes, width, height, p->linesize, 
ac);
     } else {
@@ -1706,7 +1801,7 @@ size_t ff_ffv1_encode_buffer_size(AVCodecContext *avctx)
     if (f->version > 3) {
         maxsize *= f->bits_per_raw_sample + 1;
         if (f->remap_mode)
-            maxsize += f->slice_count * 70000 * (1 + 2*f->chroma_planes + 
f->transparency);
+            maxsize += f->slice_count * 70000 * (1 + 2*f->chroma_planes + 
f->bayer + f->transparency);
     } else {
         maxsize += f->slice_count * 2 * (avctx->width + avctx->height); //for 
bug with slices that code some pixels more than once
         maxsize *= 8*(2*f->bits_per_raw_sample + 5);
@@ -1957,7 +2052,8 @@ const FFCodec ff_ffv1_encoder = {
         AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV440P12,
         AV_PIX_FMT_YAF16,
         AV_PIX_FMT_GRAYF16,
-        AV_PIX_FMT_GBRPF16, AV_PIX_FMT_GBRPF32),
+        AV_PIX_FMT_GBRPF16, AV_PIX_FMT_GBRPF32,
+        AV_PIX_FMT_BAYER_RGGB16),
     .color_ranges   = AVCOL_RANGE_MPEG,
     .p.priv_class   = &ffv1_class,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH,
-- 
2.52.0


From 0128e4b0f0f6cc3ed6d366dbe501c1c8be32feac Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Tue, 17 Mar 2026 14:05:51 +0100
Subject: [PATCH 3/8] ffv1dec: implement Bayer pixel format encoding

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1_parse.c | 15 ++++++++-
 libavcodec/ffv1dec.c    | 75 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/libavcodec/ffv1_parse.c b/libavcodec/ffv1_parse.c
index 10f3652ff5..f53b5c8e84 100644
--- a/libavcodec/ffv1_parse.c
+++ b/libavcodec/ffv1_parse.c
@@ -117,7 +117,15 @@ int ff_ffv1_read_extra_header(FFV1Context *f)
     f->chroma_h_shift             = ff_ffv1_get_symbol(&c, state, 0);
     f->chroma_v_shift             = ff_ffv1_get_symbol(&c, state, 0);
     f->transparency               = get_rac(&c, state);
-    f->plane_count                = 1 + (f->chroma_planes || f->version<4) + 
f->transparency;
+    f->bayer                      = (f->colorspace == 2);
+    if (f->bayer) {
+        f->bayer_order            = ff_ffv1_get_symbol(&c, state, 0);
+        if (f->bayer_order != 0) {
+            av_log(f->avctx, AV_LOG_ERROR, "Unsupported bayer order %d\n", 
f->bayer_order);
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+    f->plane_count                = 1 + (f->chroma_planes || f->version<4) + 
f->transparency + f->bayer;
     f->num_h_slices               = 1 + ff_ffv1_get_symbol(&c, state, 0);
     f->num_v_slices               = 1 + ff_ffv1_get_symbol(&c, state, 0);
 
@@ -430,6 +438,11 @@ int ff_ffv1_parse_header(FFV1Context *f, RangeCoder *c, 
uint8_t *state)
             }
             f->use32bit = 1;
         }
+    } else if (f->colorspace == 2) {
+        if (f->avctx->bits_per_raw_sample == 16) {
+            f->pix_fmt = AV_PIX_FMT_BAYER_RGGB16;
+            f->use32bit = 1;
+        }
     } else {
         av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n");
         return AVERROR(ENOSYS);
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 54fe769fca..6677e5c8b4 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -249,7 +249,7 @@ static int decode_slice_header(const FFV1Context *f,
     if (f->version > 3) {
         sc->slice_reset_contexts = get_rac(c, state);
         sc->slice_coding_mode = ff_ffv1_get_symbol(c, state, 0);
-        if (sc->slice_coding_mode != 1 && f->colorspace == 1) {
+        if (sc->slice_coding_mode != 1 && f->colorspace != 0) {
             sc->slice_rct_by_coef = ff_ffv1_get_symbol(c, state, 0);
             sc->slice_rct_ry_coef = ff_ffv1_get_symbol(c, state, 0);
             if ((uint64_t)sc->slice_rct_by_coef + 
(uint64_t)sc->slice_rct_ry_coef > 4) {
@@ -374,6 +374,76 @@ static int decode_remap(FFV1Context *f, FFV1SliceContext 
*sc)
     return 0;
 }
 
+static int decode_bayer_frame(FFV1Context *f, FFV1SliceContext *sc,
+                              GetBitContext *gb,
+                              uint8_t *src, int w, int h, int stride)
+{
+    int x, y, p;
+    TYPE *sample[4][2];
+    int ac = f->ac;
+    unsigned mask[4];
+
+    int bits[4], offset;
+    ff_ffv1_compute_bits_per_plane(f, sc, bits, &offset, mask, 
f->avctx->bits_per_raw_sample);
+
+    w >>= 1;
+
+    if (sc->slice_coding_mode == 1)
+        ac = 1;
+
+    for (x = 0; x < 4; x++) {
+        sample[x][0] = RENAME(sc->sample_buffer) +  x * 2      * (w + 6) + 3;
+        sample[x][1] = RENAME(sc->sample_buffer) + (x * 2 + 1) * (w + 6) + 3;
+    }
+
+    sc->run_index = 0;
+
+    memset(RENAME(sc->sample_buffer), 0, 8 * (w + 6) * 
sizeof(*RENAME(sc->sample_buffer)));
+
+    for (y = 0; y < h; y += 2) {
+        for (p = 0; p < 4; p++) {
+            int ret;
+            TYPE *temp = sample[p][0]; // FIXME: try a normal buffer
+
+            sample[p][0] = sample[p][1];
+            sample[p][1] = temp;
+
+            sample[p][1][-1]= sample[p][0][0  ];
+            sample[p][0][ w]= sample[p][0][w-1];
+            ret = RENAME(decode_line)(f, sc, gb, w, sample[p],
+                                      p == 1 ? 2 : (p > 1), bits[p], ac);
+            if (ret < 0)
+                return ret;
+        }
+
+        for (x = 0; x < w; x++) {
+            int g_r = sample[0][1][x];
+            int g_b = sample[1][1][x];
+            int b = sample[2][1][x];
+            int r = sample[3][1][x];
+
+            if (sc->slice_coding_mode != 1) {
+                b -= offset;
+                r -= offset;
+                g_r -= (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) 
>> 2;
+                b += g_r;
+                r += g_r;
+
+               /* Recover green pair: encoder stored gm = gb + (gd >> 1), gd = 
gr - gb */
+               int gd = g_b - offset;
+               g_b = g_r - (gd >> 1);
+               g_r = g_b + gd;
+            }
+
+            *((uint16_t*)(src + (x*2 + 0)*2 + stride*(y + 0))) = r;
+            *((uint16_t*)(src + (x*2 + 1)*2 + stride*(y + 0))) = g_r;
+            *((uint16_t*)(src + (x*2 + 0)*2 + stride*(y + 1))) = g_b;
+            *((uint16_t*)(src + (x*2 + 1)*2 + stride*(y + 1))) = b;
+        }
+    }
+    return 0;
+}
+
 static int decode_slice(AVCodecContext *c, void *arg)
 {
     FFV1Context *f    = c->priv_data;
@@ -449,6 +519,9 @@ static int decode_slice(AVCodecContext *c, void *arg)
     } else if (f->colorspace == 0) {
          decode_plane(f, sc, &gb, p->data[0] + ps*x + y*p->linesize[0]         
 , width, height, p->linesize[0], 0, 0, 2, ac);
          decode_plane(f, sc, &gb, p->data[0] + ps*x + y*p->linesize[0] + 
(ps>>1), width, height, p->linesize[0], 1, 1, 2, ac);
+    } else if (f->bayer) {
+        decode_bayer_frame(f, sc, &gb, p->data[0] + ps * x + y * 
p->linesize[0],
+                           width, height, p->linesize[0]);
     } else if (f->use32bit) {
         uint8_t *planes[4] = { p->data[0] + ps * x + y * p->linesize[0],
                                p->data[1] + ps * x + y * p->linesize[1],
-- 
2.52.0


From 4d73d08274d7446c9ca4c9a50c852ab567c73d53 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Tue, 26 May 2026 17:26:18 +0900
Subject: [PATCH 4/8] ffv1enc: add RCT coefficient search for Bayer

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1enc.c | 65 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 5e5974c035..d31a2c19ed 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -1222,6 +1222,69 @@ static void choose_rct_params(const FFV1Context *f, 
FFV1SliceContext *sc,
     sc->slice_rct_ry_coef = rct_y_coeff[best][0];
 }
 
+static void choose_rct_params_bayer(const FFV1Context *f, FFV1SliceContext *sc,
+                                    const uint8_t *src[4], const int stride[4],
+                                    int w, int h)
+{
+    static const int rct_y_coeff[NB_Y_COEFF][2] = {
+        { 0, 0 }, { 1, 1 }, { 2, 2 }, { 0, 2 }, { 2, 0 }, { 4, 0 }, { 0, 4 }, 
{ 0, 3 },
+        { 3, 0 }, { 3, 1 }, { 1, 3 }, { 1, 2 }, { 2, 1 }, { 0, 1 }, { 1, 0 },
+    };
+    int stat[NB_Y_COEFF] = {0};
+    int16_t *sample[3];
+    int i, best;
+
+    /* Walk in 2x2 blocks, build per-block gm/b/r, evaluate prediction-error */
+    w >>= 1;
+    for (i = 0; i < 3; i++)
+        sample[i] = sc->sample_buffer + i*w;
+
+    for (int y = 0; y < h; y += 2) {
+        int last_gm = 0, last_b = 0, last_r = 0;
+        for (int x = 0; x < w; x++) {
+            const uint16_t *l1 = (const uint16_t *)(src[0] + stride[0]*(y + 0) 
+ x*2*2);
+            const uint16_t *l2 = (const uint16_t *)(src[0] + stride[0]*(y + 1) 
+ x*2*2);
+            int r  = l1[0];
+            int gr = l1[1];
+            int gb = l2[0];
+            int b  = l2[1];
+            int gd = gr - gb;
+            int gm = gb + (gd >> 1);
+
+            int agm = gm - last_gm;
+            int ab  = b  - last_b;
+            int ar  = r  - last_r;
+
+            if (x && y) {
+                int bgm = agm - sample[0][x];
+                int bb  = ab  - sample[1][x];
+                int br  = ar  - sample[2][x];
+
+                br -= bgm;
+                bb -= bgm;
+
+                for (i = 0; i < NB_Y_COEFF; i++)
+                    stat[i] += FFABS(bgm + ((br*rct_y_coeff[i][0] + 
bb*rct_y_coeff[i][1]) >> 2));
+            }
+            sample[0][x] = agm;
+            sample[1][x] = ab;
+            sample[2][x] = ar;
+
+            last_gm = gm;
+            last_b  = b;
+            last_r  = r;
+        }
+    }
+
+    best = 0;
+    for (i = 1; i < NB_Y_COEFF; i++)
+        if (stat[i] < stat[best])
+            best = i;
+
+    sc->slice_rct_by_coef = rct_y_coeff[best][1];
+    sc->slice_rct_ry_coef = rct_y_coeff[best][0];
+}
+
 static void encode_histogram_remap(FFV1Context *f, FFV1SliceContext *sc)
 {
     int len = 1 << f->bits_per_raw_sample;
@@ -1686,6 +1749,8 @@ static int encode_slice(AVCodecContext *c, void *arg)
     sc->slice_coding_mode = 0;
     if (f->version > 3 && f->colorspace == 1) {
         choose_rct_params(f, sc, planes, p->linesize, width, height);
+    } else if (f->bayer) {
+        choose_rct_params_bayer(f, sc, planes, p->linesize, width, height);
     } else {
         sc->slice_rct_by_coef = 1;
         sc->slice_rct_ry_coef = 1;
-- 
2.52.0


From f3e8fba2a364569b606267f0862679c7eed34a72 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 27 May 2026 05:03:53 +0900
Subject: [PATCH 5/8] ffv1enc: write f->flt to extradata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parser has been reading f->flt for combined_version >= 0x40004
since commit c1b330bf24 (avcodec/ffv1: Basic float16 support), but
ff_ffv1_write_extradata() never had a matching put_symbol().
The result was that the parsed f->flt was whatever the next symbol's
worth of rangecoded bits happened to decode to — often 0, but for a
yuv420p16le -level 4 -strict experimental stream produced locally it
parses as 1.  The software decoder doesn't notice because the YUV
pixfmt-selection branches never check f->flt, but anything else that
trusts it gets garbage.

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1enc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index d31a2c19ed..eb55d3ba32 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -506,6 +506,8 @@ av_cold int ff_ffv1_write_extradata(AVCodecContext *avctx)
     if (f->version > 2) {
         put_symbol(&c, state, f->ec, 0);
         put_symbol(&c, state, f->intra = (f->avctx->gop_size < 2), 0);
+        if (f->combined_version >= 0x40004)
+            put_symbol(&c, state, f->flt, 0);
     }
 
     f->avctx->extradata_size = ff_rac_terminate(&c, 0);
-- 
2.52.0


From 8f3e8128c4660a0c5cc1c7060564ac5468b13cc7 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 27 May 2026 04:02:37 +0900
Subject: [PATCH 6/8] vulkan_ffv1: add Bayer decoder

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/ffv1_vulkan.c                      |  9 ++
 libavcodec/vulkan/Makefile                    |  4 +-
 libavcodec/vulkan/ffv1_common.glsl            |  2 +
 libavcodec/vulkan/ffv1_dec.comp.glsl          | 82 ++++++++++++++++++-
 libavcodec/vulkan/ffv1_dec_bayer.comp.glsl    | 31 +++++++
 .../vulkan/ffv1_dec_bayer_golomb.comp.glsl    | 32 ++++++++
 libavcodec/vulkan/ffv1_dec_setup.comp.glsl    |  2 +-
 libavcodec/vulkan/ffv1_vlc.glsl               |  2 +-
 libavcodec/vulkan_ffv1.c                      | 33 ++++++--
 9 files changed, 184 insertions(+), 13 deletions(-)
 create mode 100644 libavcodec/vulkan/ffv1_dec_bayer.comp.glsl
 create mode 100644 libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl

diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c
index 73c2b2a7ce..81843b1701 100644
--- a/libavcodec/ffv1_vulkan.c
+++ b/libavcodec/ffv1_vulkan.c
@@ -41,6 +41,15 @@ void ff_ffv1_vk_set_common_sl(AVCodecContext *avctx, 
FFV1Context *f,
     }
 
     int bits = desc->comp[0].depth;
+    /* Bayer pixfmts report misleading per-component depth in comp[0].depth
+     * (it counts the fraction of bits each component contributes per output
+     * pixel, not the per-sample bit width). Use bits_per_raw_sample. The
+     * encoder fills f->bits_per_raw_sample directly; the decoder only
+     * fills f->avctx->bits_per_raw_sample. Prefer the FFV1Context field
+     * with the avctx field as a fallback so this works from both sides. */
+    if (f->bayer)
+        bits = f->bits_per_raw_sample ? f->bits_per_raw_sample
+                                      : f->avctx->bits_per_raw_sample;
     SPEC_LIST_ADD(sl,  5, 32, (uint32_t)(1ULL << bits));
     SPEC_LIST_ADD(sl,  6, 32, f->colorspace);
     SPEC_LIST_ADD(sl,  7, 32, f->transparency);
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index f86931727d..0425548978 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -25,7 +25,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += 
vulkan/ffv1_dec_setup.comp.spv.o \
                                       vulkan/ffv1_dec_rgb.comp.spv.o \
                                       vulkan/ffv1_dec_rgb_golomb.comp.spv.o \
                                       vulkan/ffv1_dec_rgb_float.comp.spv.o \
-                                      
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o
+                                      
vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o \
+                                      vulkan/ffv1_dec_bayer.comp.spv.o \
+                                      vulkan/ffv1_dec_bayer_golomb.comp.spv.o
 
 OBJS-$(CONFIG_PRORES_KS_VULKAN_ENCODER) += 
vulkan/prores_ks_alpha_data.comp.spv.o \
                                            
vulkan/prores_ks_slice_data.comp.spv.o \
diff --git a/libavcodec/vulkan/ffv1_common.glsl 
b/libavcodec/vulkan/ffv1_common.glsl
index 3d3b6753c6..36bce88a4a 100644
--- a/libavcodec/vulkan/ffv1_common.glsl
+++ b/libavcodec/vulkan/ffv1_common.glsl
@@ -122,6 +122,8 @@ layout (set = 1, binding = 0, scalar) SB_QUALI buffer 
slice_ctx_buf {
 uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
 {
     uint mpw = 1 << chroma_shift;
+    if (colorspace == 2)
+        mpw = max(mpw, 2u);
     uint awidth = align(width, mpw);
 
     if ((version < 4) || ((version == 4) && (micro_version < 3)))
diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl 
b/libavcodec/vulkan/ffv1_dec.comp.glsl
index 82835e8f92..2527f988f2 100644
--- a/libavcodec/vulkan/ffv1_dec.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec.comp.glsl
@@ -247,6 +247,48 @@ void decode_line(ivec2 sp, int w,
 }
 #endif
 
+#ifdef BAYER
+void writeout_bayer(uint slice_idx, in SliceContext sc, ivec2 sp, int w, int y)
+{
+    memoryBarrierImage();
+    barrier();
+
+    int offset = rct_offset;
+
+    for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
+        ivec2 lpos = sp + LADDR(ivec2(x, y));
+        ivec2 pos  = sc.slice_pos + ivec2(int(x) << 1, y << 1);
+
+        /* Plane order set by encoder (Variant A):
+         *   dec[0]=gm (or gr in PCM), dec[1]=gd (or gb in PCM),
+         *   dec[2]=b,                 dec[3]=r */
+        int g_r = int(imageLoad(dec[0], lpos)[0]);
+        int g_b = int(imageLoad(dec[1], lpos)[0]);
+        int b   = int(imageLoad(dec[2], lpos)[0]);
+        int r   = int(imageLoad(dec[3], lpos)[0]);
+
+        if (sc.slice_coding_mode != 1) {
+            b -= offset;
+            r -= offset;
+            g_r -= (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2;
+            b += g_r;
+            r += g_r;
+
+            /* Recover green pair: encoder stored gm = gb + (gd >> 1),
+             * gd = gr - gb (with +offset on gd). */
+            int gd = g_b - offset;
+            g_b = g_r - (gd >> 1);
+            g_r = g_b + gd;
+        }
+
+        imageStore(dst[0], pos + ivec2(0, 0), uvec4(r));
+        imageStore(dst[0], pos + ivec2(1, 0), uvec4(g_r));
+        imageStore(dst[0], pos + ivec2(0, 1), uvec4(g_b));
+        imageStore(dst[0], pos + ivec2(1, 1), uvec4(b));
+    }
+}
+#endif
+
 #ifdef RGB
 ivec4 transform_sample(ivec4 pix, ivec2 rct_coef, int offset)
 {
@@ -319,14 +361,32 @@ void decode_slice(in SliceContext sc, uint slice_idx)
     ivec2 sp = sc.slice_pos;
     u16vec4 bits = get_slice_bits(sc);
 
-#ifdef RGB
+#ifdef BAYER
+    /* Bayer logical dims: 2x2 blocks at half resolution */
+    w >>= 1;
+    int bayer_h = sc.slice_dim.y >> 1;
+    sp.x >>= 1;
+    sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
+    /* c_bits = bps + 1 (the +1 is for is_rgb). For PCM mode, all planes use
+     * raw bps. For non-PCM, gm uses bps; gd/b-gm/r-gm use bps+1. */
+    if (sc.slice_coding_mode == 0)
+        bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits);
+    else
+        bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1);
+#elif defined(RGB)
     sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
 #endif
 
 #ifndef GOLOMB
     /* PCM coding */
     if (sc.slice_coding_mode == 1) {
-#ifdef RGB
+#ifdef BAYER
+        for (int y = 0; y < bayer_h; y++) {
+            for (int p = 0; p < 4; p++)
+                decode_line_pcm(sp, w, y, p);
+            writeout_bayer(slice_idx, sc, sp, w, y);
+        }
+#elif defined(RGB)
         for (int y = 0; y < sc.slice_dim.y; y++) {
             for (int p = 0; p < color_planes; p++)
                 decode_line_pcm(sp, w, y, p);
@@ -347,16 +407,32 @@ void decode_slice(in SliceContext sc, uint slice_idx)
     }
 #endif
 
+#ifdef BAYER
+    /* Variant A plane-context mapping: gm=0, gd=2, b-gm=1, r-gm=1 */
+    u8vec4 quant_table_idx = sc.quant_table_idx.xzyy;
+    u32vec4 slice_state_off = (slice_idx*codec_planes +
+                               uvec4(0, 2, 1, 1))*plane_state_size;
+#else
     u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
     u32vec4 slice_state_off = (slice_idx*codec_planes +
                                uvec4(0, 1, 1, 2))*plane_state_size;
+#endif
 
 #ifdef GOLOMB
     slice_state_off >>= 3; // division by VLC_STATE_SIZE
     golomb_init();
 #endif
 
-#ifdef RGB
+#ifdef BAYER
+    int run_index = 0;
+    for (int y = 0; y < bayer_h; y++) {
+        for (int p = 0; p < 4; p++)
+            decode_line(sp, w, y, p, bits[p],
+                        slice_state_off[p], quant_table_idx[p], run_index);
+
+        writeout_bayer(slice_idx, sc, sp, w, y);
+    }
+#elif defined(RGB)
     int run_index = 0;
     for (int y = 0; y < sc.slice_dim.y; y++) {
         for (int p = 0; p < color_planes; p++)
diff --git a/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl
new file mode 100644
index 0000000000..6ceb15a35a
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl
@@ -0,0 +1,31 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_shader_image_load_formatted : require
+
+layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
+
+#define RGB
+#define BAYER
+#include "ffv1_dec.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl
new file mode 100644
index 0000000000..ca8b7bada0
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl
@@ -0,0 +1,32 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_shader_image_load_formatted : require
+
+layout (set = 1, binding = 5) writeonly uniform uimage2D dst[];
+
+#define RGB
+#define BAYER
+#define GOLOMB
+#include "ffv1_dec.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl 
b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl
index ff57c57dc3..d000116012 100644
--- a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl
@@ -191,7 +191,7 @@ bool decode_slice_header(uint slice_idx, inout SliceContext 
sc)
     if (version >= 4) {
         sc.slice_reset_contexts = get_rac(rc_state[0]);
         sc.slice_coding_mode = get_usymbol(0);
-        if (sc.slice_coding_mode != 1 && colorspace == 1) {
+        if (sc.slice_coding_mode != 1 && colorspace != 0) {
             sc.slice_rct_coef.g = int(get_usymbol(0));
             sc.slice_rct_coef.r = int(get_usymbol(0));
             if (sc.slice_rct_coef.g + sc.slice_rct_coef.r > 4)
diff --git a/libavcodec/vulkan/ffv1_vlc.glsl b/libavcodec/vulkan/ffv1_vlc.glsl
index f362d3afbb..51f3d7ddae 100644
--- a/libavcodec/vulkan/ffv1_vlc.glsl
+++ b/libavcodec/vulkan/ffv1_vlc.glsl
@@ -35,7 +35,7 @@ void update_vlc_state(inout VlcState state, in int v)
     int drift = state.drift;
     int count = state.count;
     int bias = state.bias;
-    state.error_sum += uint16_t(abs(v));
+    state.error_sum += uint32_t(abs(v));
     drift           += v;
 
     if (count == 128) { // FIXME: variable
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index 4056f3958a..e8dafb2505 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -54,6 +54,12 @@ extern const unsigned int ff_ffv1_dec_rgb_float_comp_spv_len;
 extern const unsigned char ff_ffv1_dec_rgb_float_golomb_comp_spv_data[];
 extern const unsigned int ff_ffv1_dec_rgb_float_golomb_comp_spv_len;
 
+extern const unsigned char ff_ffv1_dec_bayer_comp_spv_data[];
+extern const unsigned int ff_ffv1_dec_bayer_comp_spv_len;
+
+extern const unsigned char ff_ffv1_dec_bayer_golomb_comp_spv_data[];
+extern const unsigned int ff_ffv1_dec_bayer_golomb_comp_spv_len;
+
 const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
     .codec_id         = AV_CODEC_ID_FFV1,
     .queue_flags      = VK_QUEUE_COMPUTE_BIT,
@@ -393,7 +399,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
         nb_img_bar = 0;
         nb_buf_bar = 0;
 
-        for (int i = 0; i < color_planes; i++)
+        /* The intermediate frame has 4 planes (GBRAP16/32). Clear all of
+         * them since the bayer decoder uses all four. */
+        int n_dec_planes = f->bayer ? 4 : color_planes;
+        for (int i = 0; i < n_dec_planes; i++)
             vk->CmdClearColorImage(exec->buf, vkf->img[i], 
VK_IMAGE_LAYOUT_GENERAL,
                                    &((VkClearColorValue) { 0 }),
                                    1, &((VkImageSubresourceRange) {
@@ -519,7 +528,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
                                       1, 5,
                                       VK_IMAGE_LAYOUT_GENERAL,
                                       VK_NULL_HANDLE);
-    if (fltmap_buf)
+    if (fltmap_buf && !f->bayer)
         ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode,
                                         1, 6, 0,
                                         fltmap_buf,
@@ -651,7 +660,8 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
                               FFVkExecPool *pool, FFVulkanShader *shd,
                               AVHWFramesContext *dec_frames_ctx,
                               AVHWFramesContext *out_frames_ctx,
-                              VkSpecializationInfo *sl, int ac, int rgb)
+                              VkSpecializationInfo *sl, int ac, int rgb,
+                              int bayer)
 {
     int err;
 
@@ -707,10 +717,19 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
         },
     };
     ff_vk_shader_add_descriptor_set(s, shd, desc_set,
-                                    5 + rgb + (f->micro_version >= 9),
+                                    5 + rgb + (!bayer && f->micro_version >= 
9),
                                     0, 0);
 
-    if (f->version >=4 && f->micro_version >= 9) {
+    if (bayer) {
+        if (ac == AC_GOLOMB_RICE)
+            ff_vk_shader_link(s, shd,
+                              ff_ffv1_dec_bayer_golomb_comp_spv_data,
+                              ff_ffv1_dec_bayer_golomb_comp_spv_len, "main");
+        else
+            ff_vk_shader_link(s, shd,
+                              ff_ffv1_dec_bayer_comp_spv_data,
+                              ff_ffv1_dec_bayer_comp_spv_len, "main");
+    } else if (f->version >=4 && f->micro_version >= 9) {
         if (ac == AC_GOLOMB_RICE)
             ff_vk_shader_link(s, shd,
                               ff_ffv1_dec_rgb_float_golomb_comp_spv_data,
@@ -809,7 +828,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
     FFv1VulkanDecodeContext *fv;
 
     if (f->version < 3 ||
-        (f->version == 4 && f->micro_version >= 10))
+        (f->version == 4 && f->micro_version >= 10 && !f->bayer))
         return AVERROR(ENOTSUP);
 
     /* Streams with a low amount of slices will usually be much slower
@@ -861,7 +880,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
 
     /* Decode shaders */
     RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode,
-                           dctx, hwfc, sl, f->ac, is_rgb));
+                           dctx, hwfc, sl, f->ac, is_rgb, f->bayer));
 
     /* Init static data */
     RET(ff_ffv1_vk_init_consts(&ctx->s, &fv->consts_buf, f));
-- 
2.52.0


From 7a96478f23f670cd117b04029fd3aebd93a4e65d Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 27 May 2026 04:21:53 +0900
Subject: [PATCH 7/8] vulkan_ffv1: detect float remap from pixfmt, not f->flt

The decode-shader picker fell over for integer remapped streams.

Sponsored-by: Sovereign Tech Fund
---
 libavcodec/vulkan_ffv1.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index e8dafb2505..ce2e392233 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -169,8 +169,9 @@ static int vk_ffv1_start_frame(AVCodecContext          
*avctx,
     if (err < 0)
         return err;
 
-    /* Allocate slice offsets/status buffer */
-    if (f->version >=4 && f->micro_version >= 9) {
+    /* Allocate slice offsets/status buffer (note, for integer+remap, we don't 
need it) */
+    if (f->version >=4 && f->micro_version >= 9 &&
+        (av_pix_fmt_desc_get(sw_format)->flags & AV_PIX_FMT_FLAG_FLOAT)) {
         err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_fltmap_pool,
                                       &fp->slice_fltmap_buf,
                                       VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
@@ -528,7 +529,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
                                       1, 5,
                                       VK_IMAGE_LAYOUT_GENERAL,
                                       VK_NULL_HANDLE);
-    if (fltmap_buf && !f->bayer)
+    if (fltmap_buf)
         ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode,
                                         1, 6, 0,
                                         fltmap_buf,
@@ -716,8 +717,16 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
             .stages = VK_SHADER_STAGE_COMPUTE_BIT,
         },
     };
+    /* Detect a float output from the pixfmt descriptor instead of f->flt:
+     * the encoder side does not (yet) write f->flt to the extradata, so the
+     * parsed value is unreliable for some v4m4+ streams. The descriptor's
+     * FLOAT flag is set by the pixfmt selection logic and is accurate */
+    int is_float = !!(av_pix_fmt_desc_get(out_frames_ctx->sw_format)->flags &
+                      AV_PIX_FMT_FLAG_FLOAT);
+
+    /* Bindings 5 (dst) and 6 (fltmap_buf) are conditional */
     ff_vk_shader_add_descriptor_set(s, shd, desc_set,
-                                    5 + rgb + (!bayer && f->micro_version >= 
9),
+                                    5 + rgb + (is_float && !bayer),
                                     0, 0);
 
     if (bayer) {
@@ -729,7 +738,7 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
             ff_vk_shader_link(s, shd,
                               ff_ffv1_dec_bayer_comp_spv_data,
                               ff_ffv1_dec_bayer_comp_spv_len, "main");
-    } else if (f->version >=4 && f->micro_version >= 9) {
+    } else if (is_float) {
         if (ac == AC_GOLOMB_RICE)
             ff_vk_shader_link(s, shd,
                               ff_ffv1_dec_rgb_float_golomb_comp_spv_data,
@@ -827,8 +836,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
     FFVulkanDecodeShared *ctx = NULL;
     FFv1VulkanDecodeContext *fv;
 
-    if (f->version < 3 ||
-        (f->version == 4 && f->micro_version >= 10 && !f->bayer))
+    if (f->version < 3)
         return AVERROR(ENOTSUP);
 
     /* Streams with a low amount of slices will usually be much slower
-- 
2.52.0


From 438fe7d7d885c1e72a8690eaff2478b5a0ea9224 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Fri, 29 May 2026 04:11:45 +0900
Subject: [PATCH 8/8] vulkan_ffv1: add Bayer encoder

---
 libavcodec/ffv1enc_vulkan.c                   |  23 +++-
 libavcodec/vulkan/Makefile                    |   4 +-
 libavcodec/vulkan/ffv1_enc.comp.glsl          | 118 +++++++++++++++---
 libavcodec/vulkan/ffv1_enc_bayer.comp.glsl    |  31 +++++
 .../vulkan/ffv1_enc_bayer_golomb.comp.glsl    |  32 +++++
 libavcodec/vulkan/ffv1_enc_setup.comp.glsl    |  14 ++-
 6 files changed, 191 insertions(+), 31 deletions(-)
 create mode 100644 libavcodec/vulkan/ffv1_enc_bayer.comp.glsl
 create mode 100644 libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl

diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 7c22ced785..40e758f093 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -150,6 +150,12 @@ extern const unsigned int 
ff_ffv1_enc_rgb_float_golomb_comp_spv_len;
 extern const unsigned char ff_ffv1_enc_sort32_comp_spv_data[];
 extern const unsigned int ff_ffv1_enc_sort32_comp_spv_len;
 
+extern const unsigned char ff_ffv1_enc_bayer_comp_spv_data[];
+extern const unsigned int ff_ffv1_enc_bayer_comp_spv_len;
+
+extern const unsigned char ff_ffv1_enc_bayer_golomb_comp_spv_data[];
+extern const unsigned int ff_ffv1_enc_bayer_golomb_comp_spv_len;
+
 static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec,
                           AVFrame *enc_in, VkImageView *enc_in_views,
                           FFVkBuffer *slice_data_buf, uint32_t slice_data_size,
@@ -1210,7 +1216,16 @@ static int init_encode_shader(AVCodecContext *avctx, 
VkSpecializationInfo *sl)
     ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set,
                                     4 + fv->is_rgb + !!f->remap_mode, 0, 0);
 
-    if (f->remap_mode) {
+    if (f->bayer) {
+        if (fv->ctx.ac == AC_GOLOMB_RICE)
+            ff_vk_shader_link(&fv->s, shd,
+                              ff_ffv1_enc_bayer_golomb_comp_spv_data,
+                              ff_ffv1_enc_bayer_golomb_comp_spv_len, "main");
+        else
+            ff_vk_shader_link(&fv->s, shd,
+                              ff_ffv1_enc_bayer_comp_spv_data,
+                              ff_ffv1_enc_bayer_comp_spv_len, "main");
+    } else if (f->remap_mode) {
         if (fv->ctx.ac == AC_GOLOMB_RICE)
             ff_vk_shader_link(&fv->s, shd,
                               ff_ffv1_enc_rgb_float_golomb_comp_spv_data,
@@ -1288,9 +1303,9 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext 
*avctx)
             return AVERROR_INVALIDDATA;
     }
 
-    /* We target version 4.3 */
+    /* We target version 4.3 by default */
     if (f->version == 4)
-        f->micro_version = f->remap_mode ? 9 : 3;
+        f->micro_version = (f->remap_mode || f->bayer) ? 9 : 3;
 
     f->num_h_slices = fv->num_h_slices;
     f->num_v_slices = fv->num_v_slices;
@@ -1437,7 +1452,7 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext 
*avctx)
 
     /* Init rct search shader */
     fv->optimize_rct = fv->is_rgb && f->version >= 4 &&
-                       !fv->force_pcm && fv->optimize_rct;
+                       !fv->force_pcm && fv->optimize_rct && !f->bayer;
 
     /* Init shader specialization consts */
     SPEC_LIST_CREATE(sl, 19, 19*sizeof(uint32_t))
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 0425548978..3c47cbd58b 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -15,7 +15,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += 
vulkan/ffv1_enc_setup.comp.spv.o \
                                       vulkan/ffv1_enc_remap.comp.spv.o \
                                       vulkan/ffv1_enc_rgb_float.comp.spv.o \
                                       
vulkan/ffv1_enc_rgb_float_golomb.comp.spv.o \
-                                      vulkan/ffv1_enc_sort32.comp.spv.o
+                                      vulkan/ffv1_enc_sort32.comp.spv.o \
+                                      vulkan/ffv1_enc_bayer.comp.spv.o \
+                                      vulkan/ffv1_enc_bayer_golomb.comp.spv.o
 
 OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \
                                       vulkan/ffv1_dec_reset.comp.spv.o \
diff --git a/libavcodec/vulkan/ffv1_enc.comp.glsl 
b/libavcodec/vulkan/ffv1_enc.comp.glsl
index 1c30e91828..a4a942782d 100644
--- a/libavcodec/vulkan/ffv1_enc.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc.comp.glsl
@@ -79,6 +79,9 @@ void encode_line_pcm(in SliceContext sc, readonly uimage2D 
img,
         return;
 
     int w = sc.slice_dim.x;
+#ifdef BAYER
+    w >>= 1;
+#endif
 
 #ifndef RGB
     if (p > 0 && p < 3) {
@@ -100,6 +103,9 @@ void encode_line(in SliceContext sc, readonly uimage2D img, 
uint state_off,
                  uint8_t quant_table_idx, in int run_index)
 {
     int w = sc.slice_dim.x;
+#ifdef BAYER
+    w >>= 1;
+#endif
 
 #ifndef RGB
     if (p > 0 && p < 3) {
@@ -160,6 +166,9 @@ void encode_line(in SliceContext sc, readonly uimage2D img, 
uint state_off,
                  uint8_t quant_table_idx, inout int run_index)
 {
     int w = sc.slice_dim.x;
+#ifdef BAYER
+    w >>= 1;
+#endif
 
 #ifndef RGB
     if (p > 0 && p < 3) {
@@ -301,6 +310,41 @@ void preload_rgb(uint slice_idx, in SliceContext sc, ivec2 
sp, int w, int y,
     memoryBarrierImage();
     barrier();
 }
+
+#ifdef BAYER
+void preload_bayer(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
+{
+    int offset = rct_offset;
+
+    for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
+        ivec2 lpos = sp + LADDR(ivec2(x, y));
+        ivec2 src_pos = sc.slice_pos + ivec2(int(x) << 1, y << 1);
+
+        int r  = int(imageLoad(src[0], src_pos + ivec2(0, 0))[0]);
+        int gr = int(imageLoad(src[0], src_pos + ivec2(1, 0))[0]);
+        int gb = int(imageLoad(src[0], src_pos + ivec2(0, 1))[0]);
+        int b  = int(imageLoad(src[0], src_pos + ivec2(1, 1))[0]);
+
+        if (apply_rct) {
+            int gd = gr - gb;
+            int gm = gb + (gd >> 1);
+            b -= gm;
+            r -= gm;
+            gm += (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2;
+            b += offset;
+            r += offset;
+            gd += offset;
+            gr = gm;
+            gb = gd;
+        }
+
+        imageStore(tmp, lpos, ivec4(gr, gb, b, r));
+    }
+
+    memoryBarrierImage();
+    barrier();
+}
+#endif
 #endif
 
 void encode_slice(in SliceContext sc, uint slice_idx)
@@ -308,13 +352,39 @@ void encode_slice(in SliceContext sc, uint slice_idx)
     ivec2 sp = sc.slice_pos;
     u16vec4 bits = get_slice_bits(sc);
 
-#ifdef RGB
+#ifdef BAYER
+    int bayer_w = sc.slice_dim.x >> 1;
+    int bayer_h = sc.slice_dim.y >> 1;
+    sp.x >>= 1;
+    sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
+    /* c_bits = bps + 1 for is_rgb pixfmts (Bayer is treated as RGB). gm uses
+     * raw bps; gd/b-gm/r-gm need an extra bit for the RCT difference. PCM
+     * stores raw samples so all planes use bps. */
+    if (sc.slice_coding_mode == 0)
+        bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits);
+    else
+        bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1);
+#elif defined(RGB)
     sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
 #endif
 
 #ifndef GOLOMB
     if (force_pcm) {
-#ifndef RGB
+#ifdef BAYER
+        for (int y = 0; y < bayer_h; y++) {
+            preload_bayer(sc, sp, bayer_w, y, false);
+
+            for (uint c = 0; c < 4; c++)
+                encode_line_pcm(sc, tmp, sp, y, 0, c);
+        }
+#elif defined(RGB)
+        for (int y = 0; y < sc.slice_dim.y; y++) {
+            preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, false);
+
+            for (uint c = 0; c < color_planes; c++)
+                encode_line_pcm(sc, tmp, sp, y, 0, rgb_plane_order[c]);
+        }
+#else
         for (int c = 0; c < color_planes; c++) {
 
             int h = sc.slice_dim.y;
@@ -328,27 +398,45 @@ void encode_slice(in SliceContext sc, uint slice_idx)
             for (int y = 0; y < h; y++)
                 encode_line_pcm(sc, src[p], sp, y, p, comp);
         }
-#else
-        for (int y = 0; y < sc.slice_dim.y; y++) {
-            preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, false);
-
-            for (uint c = 0; c < color_planes; c++)
-                encode_line_pcm(sc, tmp, sp, y, 0, rgb_plane_order[c]);
-        }
 #endif
         return;
     }
 #endif
 
+#ifdef BAYER
+    u32vec4 slice_state_off = (slice_idx*codec_planes +
+                               uvec4(0, 2, 1, 1))*plane_state_size;
+#else
     u32vec4 slice_state_off = (slice_idx*codec_planes +
                                uvec4(0, 1, 1, 2))*plane_state_size;
+#endif
 
 #ifdef GOLOMB
     slice_state_off >>= 3;
     init_golomb();
 #endif
 
-#ifndef RGB
+#ifdef BAYER
+    int run_index = 0;
+    for (int y = 0; y < bayer_h; y++) {
+        preload_bayer(sc, sp, bayer_w, y, true);
+
+        for (uint c = 0; c < 4; c++)
+            encode_line(sc, tmp, slice_state_off[c],
+                        sp, y, 0, c, bits[c],
+                        U8(context_model), run_index);
+    }
+#elif defined(RGB)
+    int run_index = 0;
+    for (int y = 0; y < sc.slice_dim.y; y++) {
+        preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, true);
+
+        for (uint c = 0; c < color_planes; c++)
+            encode_line(sc, tmp, slice_state_off[c],
+                        sp, y, 0, rgb_plane_order[c], bits[c],
+                        U8(context_model), run_index);
+    }
+#else
     for (uint c = 0; c < color_planes; c++) {
         int run_index = 0;
 
@@ -363,16 +451,6 @@ void encode_slice(in SliceContext sc, uint slice_idx)
             encode_line(sc, src[p], slice_state_off[c], sp, y, p,
                         comp, bits[c], U8(context_model), run_index);
     }
-#else
-    int run_index = 0;
-    for (int y = 0; y < sc.slice_dim.y; y++) {
-        preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, true);
-
-        for (uint c = 0; c < color_planes; c++)
-            encode_line(sc, tmp, slice_state_off[c],
-                        sp, y, 0, rgb_plane_order[c], bits[c],
-                        U8(context_model), run_index);
-    }
 #endif
 }
 
diff --git a/libavcodec/vulkan/ffv1_enc_bayer.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_bayer.comp.glsl
new file mode 100644
index 0000000000..fc463795f2
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_bayer.comp.glsl
@@ -0,0 +1,31 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_shader_image_load_formatted : require
+
+layout (set = 1, binding = 4) uniform uimage2D tmp;
+
+#define RGB
+#define BAYER
+#include "ffv1_enc.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl
new file mode 100644
index 0000000000..3c49fac3b7
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl
@@ -0,0 +1,32 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2026 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#pragma shader_stage(compute)
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_shader_image_load_formatted : require
+
+layout (set = 1, binding = 4) uniform uimage2D tmp;
+
+#define RGB
+#define BAYER
+#define GOLOMB
+#include "ffv1_enc.comp.glsl"
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl 
b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
index e931019a43..de2d459536 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl
@@ -252,17 +252,19 @@ void write_slice_header(uint slice_idx, inout 
SliceContext sc)
     if (version >= 4) {
         put_rac(rc_state[0], force_pcm);
         put_usymbol(uint(force_pcm), 0);
-        if (!force_pcm && colorspace == 1) {
+        if (!force_pcm && colorspace != 0) {
             put_usymbol(sc.slice_rct_coef.g, 0);
             put_usymbol(sc.slice_rct_coef.r, 0);
         }
 
-        if (remap_mode != 0) {
+        if (micro_version >= 4) {
             put_usymbol(remap_mode, 0);
-            if (c_bits >= 32)
-                encode_float32_remap(slice_idx, sc);
-            else
-                encode_histogram_remap(slice_idx, sc);
+            if (remap_mode != 0) {
+                if (c_bits >= 32)
+                    encode_float32_remap(slice_idx, sc);
+                else
+                    encode_histogram_remap(slice_idx, sc);
+            }
         }
     }
 }
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to