PR #23268 opened by Lynne URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23268 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23268.patch
Depends/based on #22528 Implements encoding and decoding of Bayer data in the Vulkan FFv1 decoder and encoder. From ff8eafa85b46dabb4d48d90f5e1087181aeb70fb Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Tue, 26 May 2026 11:38:29 +0900 Subject: [PATCH 1/8] vulkan/ffv1: add 32-bit float RGB encoding and a rice + remap path This implements 32-bit float RGB encoding and makes the Vulkan implementation on-par with the C implementation. Sponsored-by: Sovereign Tech Fund --- libavcodec/ffv1_vulkan.h | 1 + libavcodec/ffv1enc_vulkan.c | 153 ++++++++++++++++- libavcodec/vulkan/Makefile | 4 +- libavcodec/vulkan/ffv1_common.glsl | 1 + libavcodec/vulkan/ffv1_enc.comp.glsl | 27 ++- .../ffv1_enc_rgb_float_golomb.comp.glsl | 33 ++++ libavcodec/vulkan/ffv1_enc_setup.comp.glsl | 127 +++++++++++++- libavcodec/vulkan/ffv1_enc_sort32.comp.glsl | 155 ++++++++++++++++++ 8 files changed, 477 insertions(+), 24 deletions(-) create mode 100644 libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl create mode 100644 libavcodec/vulkan/ffv1_enc_sort32.comp.glsl diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h index 9a206afaca..d6ae0f3fee 100644 --- a/libavcodec/ffv1_vulkan.h +++ b/libavcodec/ffv1_vulkan.h @@ -48,6 +48,7 @@ typedef struct FFv1ShaderParams { int sar[2]; int pic_mode; uint32_t slice_size_max; + uint32_t max_pixels_per_slice; } FFv1ShaderParams; #endif /* AVCODEC_FFV1_VULKAN_H */ diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 92d46f7ddf..7c22ced785 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -72,6 +72,7 @@ typedef struct VulkanEncodeFFv1Context { FFVulkanShader rct_search; FFVulkanShader remap; + FFVulkanShader sort32; FFVulkanShader setup; FFVulkanShader reset; FFVulkanShader enc; @@ -101,6 +102,8 @@ typedef struct VulkanEncodeFFv1Context { int optimize_rct; int is_rgb; + int is_float32; + uint32_t max_pixels_per_slice; int ppi; int chunks; } VulkanEncodeFFv1Context; @@ -141,6 +144,12 @@ extern const unsigned int ff_ffv1_enc_remap_comp_spv_len; extern const unsigned char ff_ffv1_enc_rgb_float_comp_spv_data[]; extern const unsigned int ff_ffv1_enc_rgb_float_comp_spv_len; +extern const unsigned char ff_ffv1_enc_rgb_float_golomb_comp_spv_data[]; +extern const unsigned int ff_ffv1_enc_rgb_float_golomb_comp_spv_len; + +extern const unsigned char ff_ffv1_enc_sort32_comp_spv_data[]; +extern const unsigned int ff_ffv1_enc_sort32_comp_spv_len; + static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec, AVFrame *enc_in, VkImageView *enc_in_views, FFVkBuffer *slice_data_buf, uint32_t slice_data_size, @@ -203,6 +212,37 @@ static int run_remap(AVCodecContext *avctx, FFVkExecContext *exec, return 0; } +static int run_sort32(AVCodecContext *avctx, FFVkExecContext *exec, + AVFrame *enc_in, VkImageView *enc_in_views, + FFVkBuffer *units_buf, uint32_t units_size, + FFv1ShaderParams *pd) +{ + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanFunctions *vk = &fv->s.vkfn; + + /* Update descriptors */ + ff_vk_shader_update_img_array(&fv->s, exec, &fv->sort32, + enc_in, enc_in_views, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->sort32, + 1, 2, 0, + units_buf, + 0, units_size*f->slice_count, + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&fv->s, exec, &fv->sort32); + ff_vk_shader_update_push_const(&fv->s, exec, &fv->sort32, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(FFv1ShaderParams), pd); + + vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); + + return 0; +} + static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, FFVkExecContext *exec, const AVFrame *pict) @@ -279,15 +319,19 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, slice_data_buf = (FFVkBuffer *)slice_data_ref->data; if (f->remap_mode) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fv->s.frames->sw_format); - remap_data_size = 4*(1 << desc->comp[0].depth)*sizeof(uint32_t); + if (fv->is_float32) { + /* Per (slice, plane): [units : max_pixels*2 uints] + [bitmap : max_pixels uints]. */ + remap_data_size = 4*fv->max_pixels_per_slice*3*sizeof(uint32_t); + } else { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fv->s.frames->sw_format); + remap_data_size = 4*(1 << desc->comp[0].depth)*sizeof(uint32_t); + } RET(ff_vk_get_pooled_buffer(&fv->s, &fv->remap_data_pool, &remap_data_ref, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, NULL, remap_data_size*f->slice_count, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); - remap_data_buf = (FFVkBuffer *)remap_data_ref->data; } @@ -348,6 +392,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 : !(pict->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 2 : 1, .slice_size_max = out_data_buf->size / f->slice_count, + .max_pixels_per_slice = fv->max_pixels_per_slice, }; for (int i = 0; i < f->quant_table_count; i++) { @@ -420,8 +465,13 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, } if (f->remap_mode) { - RET(run_remap(avctx, exec, src, src_views, - remap_data_buf, remap_data_size, &pd)); + if (fv->is_float32) { + RET(run_sort32(avctx, exec, src, src_views, + remap_data_buf, remap_data_size, &pd)); + } else { + RET(run_remap(avctx, exec, src, src_views, + remap_data_buf, remap_data_size, &pd)); + } /* Make sure the writes are visible to the setup shader */ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], remap_data_buf, @@ -519,6 +569,14 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR, COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT, 0, slice_data_size*f->slice_count); + + /* Setup writes the per-pixel compact_idx (or compact_idx-of-value) + * back into the remap buffer; the encode shader reads it. */ + if (f->remap_mode) + ff_vk_buf_barrier(buf_bar[nb_buf_bar++], remap_data_buf, + COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT, + COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR, + 0, remap_data_size*f->slice_count); if (f->key_frame || fv->force_pcm) ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf, COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR, @@ -906,6 +964,54 @@ fail: return err; } +static int init_sort32_shader(AVCodecContext *avctx, VkSpecializationInfo *sl) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFVulkanShader *shd = &fv->sort32; + + uint32_t wg_x = FFMIN(fv->max_pixels_per_slice, 256); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, + (uint32_t []) { wg_x, 1, 1 }, 0); + + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams), + VK_SHADER_STAGE_COMPUTE_BIT); + + const FFVulkanDescriptorSetBinding desc_set_const[] = { + { /* rangecoder_buf */ + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set_const, 1, 1, 0); + + const FFVulkanDescriptorSetBinding desc_set[] = { + { /* slice_data_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { /* src */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), + }, + { /* units */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0); + + RET(ff_vk_shader_link(&fv->s, shd, + ff_ffv1_enc_sort32_comp_spv_data, + ff_ffv1_enc_sort32_comp_spv_len, "main")); + + RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); + +fail: + return err; +} + static int init_remap_shader(AVCodecContext *avctx, VkSpecializationInfo *sl) { int err; @@ -1105,9 +1211,14 @@ static int init_encode_shader(AVCodecContext *avctx, VkSpecializationInfo *sl) 4 + fv->is_rgb + !!f->remap_mode, 0, 0); if (f->remap_mode) { - ff_vk_shader_link(&fv->s, shd, - ff_ffv1_enc_rgb_float_comp_spv_data, - ff_ffv1_enc_rgb_float_comp_spv_len, "main"); + if (fv->ctx.ac == AC_GOLOMB_RICE) + ff_vk_shader_link(&fv->s, shd, + ff_ffv1_enc_rgb_float_golomb_comp_spv_data, + ff_ffv1_enc_rgb_float_golomb_comp_spv_len, "main"); + else + ff_vk_shader_link(&fv->s, shd, + ff_ffv1_enc_rgb_float_comp_spv_data, + ff_ffv1_enc_rgb_float_comp_spv_len, "main"); } else if (fv->ctx.ac == AC_GOLOMB_RICE) { if (fv->is_rgb) ff_vk_shader_link(&fv->s, shd, @@ -1304,6 +1415,26 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) fv->is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) && !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8); + fv->is_float32 = (avctx->sw_pix_fmt == AV_PIX_FMT_GBRPF32 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRAPF32); + + if (fv->is_float32) { + /* Compute the worst-case slice geometry. With version >= 4 the slice + * boundaries are computed via slice_coord() which rounds up, so any + * single slice has at most ceil(width/num_h_slices) * ceil(height/num_v_slices) + * pixels. */ + uint32_t mw = (avctx->width + f->num_h_slices - 1) / f->num_h_slices; + uint32_t mh = (avctx->height + f->num_v_slices - 1) / f->num_v_slices; + /* Round up to next pow2 for bitonic sort */ + uint32_t n = 1; + uint32_t pn = mw*mh; + while (n < pn) + n <<= 1; + if (n < 2) + n = 2; + fv->max_pixels_per_slice = n; + } + /* Init rct search shader */ fv->optimize_rct = fv->is_rgb && f->version >= 4 && !fv->force_pcm && fv->optimize_rct; @@ -1325,7 +1456,10 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) } if (f->remap_mode) { - err = init_remap_shader(avctx, sl); + if (fv->is_float32) + err = init_sort32_shader(avctx, sl); + else + err = init_remap_shader(avctx, sl); if (err < 0) return err; } @@ -1420,6 +1554,7 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) ff_vk_shader_free(&fv->s, &fv->reset); ff_vk_shader_free(&fv->s, &fv->setup); ff_vk_shader_free(&fv->s, &fv->remap); + ff_vk_shader_free(&fv->s, &fv->sort32); ff_vk_shader_free(&fv->s, &fv->rct_search); if (fv->exec_ctx_info) { diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index c6817967c7..f86931727d 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -13,7 +13,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/ffv1_enc_setup.comp.spv.o \ vulkan/ffv1_enc_rgb_golomb.comp.spv.o \ vulkan/ffv1_enc_rct_search.comp.spv.o \ vulkan/ffv1_enc_remap.comp.spv.o \ - vulkan/ffv1_enc_rgb_float.comp.spv.o + vulkan/ffv1_enc_rgb_float.comp.spv.o \ + vulkan/ffv1_enc_rgb_float_golomb.comp.spv.o \ + vulkan/ffv1_enc_sort32.comp.spv.o OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \ vulkan/ffv1_dec_reset.comp.spv.o \ diff --git a/libavcodec/vulkan/ffv1_common.glsl b/libavcodec/vulkan/ffv1_common.glsl index 8580a0777f..3d3b6753c6 100644 --- a/libavcodec/vulkan/ffv1_common.glsl +++ b/libavcodec/vulkan/ffv1_common.glsl @@ -75,6 +75,7 @@ layout (push_constant, scalar) uniform pushConstants { ivec2 sar; int pic_mode; uint slice_size_max; + uint max_pixels_per_slice; }; #include "rangecoder.glsl" diff --git a/libavcodec/vulkan/ffv1_enc.comp.glsl b/libavcodec/vulkan/ffv1_enc.comp.glsl index 90ce8293b9..1c30e91828 100644 --- a/libavcodec/vulkan/ffv1_enc.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc.comp.glsl @@ -40,8 +40,8 @@ layout (set = 1, binding = 1, scalar) writeonly buffer slice_results_buf { * denormals before we get to look at them. */ layout (set = 1, binding = 3) uniform uimage2D src[]; #ifdef FLOAT -layout (set = 1, binding = 5) readonly buffer fltmap_buf { - uint fltmap[][4][65536]; +layout (set = 1, binding = 5, scalar) readonly buffer fltmap_buf { + uint fltmap[]; }; #endif @@ -239,11 +239,24 @@ ivec4 load_components(uint slice_idx, in SliceContext sc, ivec2 pos) { ivec4 pix; #ifdef FLOAT - /* Source view is r16_uint so imageLoad returns the raw fp16 bit pattern - * in .x; no conversion is performed and denormals survive. */ - for (int i = 0; i < color_planes; i++) { - uint iv = imageLoad(src[i], pos)[0] & 0xFFFFu; - pix[i] = int(fltmap[slice_idx][i][iv]); + if (c_bits >= 32) { + /* 32-bit float: per-pixel-position bitmap lookup. The bitmap region + * follows the units region in the same buffer. */ + ivec2 rel = pos - sc.slice_pos; + uint pixel_idx = uint(rel.x + sc.slice_dim.x*rel.y); + uint plane_stride = max_pixels_per_slice*3u; + for (int i = 0; i < color_planes; i++) { + uint base = (slice_idx*4u + uint(i))*plane_stride + + max_pixels_per_slice*2u; + pix[i] = int(fltmap[base + pixel_idx]); + } + } else { + /* 16-bit float: value-indexed lookup. Source view is r16_uint so + * imageLoad returns the raw fp16 bit pattern in .x. */ + for (int i = 0; i < color_planes; i++) { + uint iv = imageLoad(src[i], pos)[0] & 0xFFFFu; + pix[i] = int(fltmap[(slice_idx*4u + uint(i))*65536u + iv]); + } } #else pix = ivec4(imageLoad(src[0], pos)); diff --git a/libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl b/libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl new file mode 100644 index 0000000000..e4535eb08f --- /dev/null +++ b/libavcodec/vulkan/ffv1_enc_rgb_float_golomb.comp.glsl @@ -0,0 +1,33 @@ +/* + * FFv1 codec + * + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_shader_image_load_formatted : require + +layout (set = 1, binding = 4) uniform uimage2D tmp; + +#define PB_UNALIGNED +#define GOLOMB +#define FLOAT +#define RGB +#include "ffv1_enc.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl index 53a8d7f13f..e931019a43 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl @@ -23,13 +23,13 @@ #pragma shader_stage(compute) #extension GL_GOOGLE_include_directive : require -#define NB_CONTEXTS 2 +#define NB_CONTEXTS 6 #define FULL_RENORM #include "common.glsl" #include "ffv1_common.glsl" -layout (set = 1, binding = 1) buffer fltmap_buf { - uint fltmap[][4][65536]; +layout (set = 1, binding = 1, scalar) buffer fltmap_buf { + uint fltmap[]; }; void init_slice(inout SliceContext sc, uint slice_idx) @@ -81,6 +81,7 @@ void encode_histogram_remap(uint slice_idx, inout SliceContext sc) const int flip = (remap_mode == 2) ? 0x7FFF : 0; for (int p = 0; p < color_planes; p++) { + const uint base = (slice_idx*4u + uint(p))*65536u; uint j = 0; uint lu = 0; int run = 0; @@ -90,15 +91,15 @@ void encode_histogram_remap(uint slice_idx, inout SliceContext sc) put_usymbol(0, 0); - for (int i = 0; i < NB_CONTEXTS; i++) + for (int i = 0; i < NB_CONTEXTS*CONTEXT_SIZE; i++) rc_state[i] = uint8_t(128); int cnt = 0; for (int i = 0; i < rct_offset; i++) { int ri = i ^ (((i & 0x8000) != 0) ? 0 : flip); - uint u = uint(fltmap[slice_idx][p][ri] != 0); + uint u = uint(fltmap[base + uint(ri)] != 0u); - fltmap[slice_idx][p][ri] = uint16_t(j); + fltmap[base + uint(ri)] = j; j += u; if (lu == u) { @@ -117,6 +118,115 @@ void encode_histogram_remap(uint slice_idx, inout SliceContext sc) } } +/* The 32-bit float remap uses 6 contexts: state[lu][category][bit] with + * lu = 0,1 and category = 0 (run/step-1), 1 (delta -- unused here), 2 (mul). */ +#define CTX_F32(lu, cat) ((uint(lu)*3u + uint(cat))*CONTEXT_SIZE) + +void encode_float32_remap(uint slice_idx, inout SliceContext sc) +{ + const uint slice_w = uint(sc.slice_dim.x); + const uint slice_h = uint(sc.slice_dim.y); + const uint pixel_num = slice_w * slice_h; + const uint plane_stride = max_pixels_per_slice*3u; + + for (int p = 0; p < color_planes; p++) { + /* Layout: per (slice, plane) we have [units : max_pixels*8 bytes] + * followed by [bitmap : max_pixels*4 bytes]. The units region is + * read-only here, the bitmap region is written. */ + const uint plane_base = (slice_idx*4u + uint(p))*plane_stride; + const uint bitmap_base = plane_base + max_pixels_per_slice*2u; + + for (int i = 0; i < NB_CONTEXTS*CONTEXT_SIZE; i++) + rc_state[i] = uint8_t(128); + + put_usymbol(1, CTX_F32(0, 0)); + + for (int i = 0; i < NB_CONTEXTS*CONTEXT_SIZE; i++) + rc_state[i] = uint8_t(128); + + /* last_val is the last unique value (or 0xFFFFFFFF as the "before + * any value" sentinel; this lets step = val - last_val give val+1 + * for the first emission via unsigned wraparound). */ + uint last_val = 0xFFFFFFFFu; + uint lu = 0; + uint run = 0; + int ci = -1; + bool emit_first_mul = true; + + for (uint i = 0; i < pixel_num; i++) { + uint u_val = fltmap[plane_base + 2u*i + 0u]; + uint u_ndx = fltmap[plane_base + 2u*i + 1u]; + + /* Duplicate of the previous unique value? Reuse ci. */ + if (i > 0u && last_val == u_val) { + fltmap[bitmap_base + u_ndx] = uint(ci); + continue; + } + + uint step = u_val - last_val; + + if (lu == 0u) { + put_usymbol(step - 1u, CTX_F32(0, 0)); + + if (emit_first_mul) { + put_usymbol(1, CTX_F32(0, 2)); + emit_first_mul = false; + } + + last_val = u_val; + if (step == 1u) { + lu = 1; + run = 0; + } + } else { + if (step == 1u) { + run++; + last_val = u_val; + } else { + if (run > 0u) { + put_usymbol(run, CTX_F32(1, 0)); + put_usymbol(0, CTX_F32(1, 0)); + last_val += 2u; + } else { + put_usymbol(0, CTX_F32(1, 0)); + last_val += 1u; + } + lu = 0; + run = 0; + + step = u_val - last_val; + put_usymbol(step - 1u, CTX_F32(0, 0)); + + last_val = u_val; + if (step == 1u) { + lu = 1; + run = 0; + } + } + } + + ci++; + fltmap[bitmap_base + u_ndx] = uint(ci); + } + + if (lu == 1u) { + if (run > 0u) { + put_usymbol(run, CTX_F32(1, 0)); + put_usymbol(0, CTX_F32(1, 0)); + last_val += 2u; + } else { + put_usymbol(0, CTX_F32(1, 0)); + last_val += 1u; + } + } + + if (last_val != 0xFFFFFFFFu) + put_usymbol(0xFFFFFFFFu - last_val, CTX_F32(0, 0)); + + sc.remap_count[p] = ci + 1; + } +} + void write_slice_header(uint slice_idx, inout SliceContext sc) { [[unroll]] @@ -149,7 +259,10 @@ void write_slice_header(uint slice_idx, inout SliceContext sc) if (remap_mode != 0) { put_usymbol(remap_mode, 0); - encode_histogram_remap(slice_idx, sc); + if (c_bits >= 32) + encode_float32_remap(slice_idx, sc); + else + encode_histogram_remap(slice_idx, sc); } } } diff --git a/libavcodec/vulkan/ffv1_enc_sort32.comp.glsl b/libavcodec/vulkan/ffv1_enc_sort32.comp.glsl new file mode 100644 index 0000000000..872c7daa2b --- /dev/null +++ b/libavcodec/vulkan/ffv1_enc_sort32.comp.glsl @@ -0,0 +1,155 @@ +/* + * FFv1 codec + * + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#define SB_QUALI readonly +#include "common.glsl" +#include "ffv1_common.glsl" + +layout (set = 1, binding = 1) uniform uimage2D src[]; + +layout (set = 1, binding = 2, scalar) buffer fltmap_buf { + uint fltmap[]; +}; + +/* The shared fltmap_buf is laid out per (slice, plane) as a + * [max_pixels_per_slice*3] uint block, where the first + * [max_pixels_per_slice*2] entries hold interleaved (val, ndx) pairs and + * the trailing [max_pixels_per_slice] entries are the bitmap region used + * by the setup/encode shaders. Padding past pixel_num is the sentinel + * (UINT32_MAX, UINT32_MAX) so it sorts to the end. */ + +/* Per-workgroup bitonic-sort buffer. Limits a slice's pow2 size; large + * slices fall back to working in global memory. */ +shared u32vec2 smem[8192]; + +void main(void) +{ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + uvec2 img_size = imageSize(src[0]); + + uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0, + gl_NumWorkGroups.x, 0); + uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1, + gl_NumWorkGroups.x, 0); + uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0, + gl_NumWorkGroups.y, 0); + uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1, + gl_NumWorkGroups.y, 0); + + uint slice_w = sxe - sxs; + uint slice_h = sye - sys; + uint pixel_num = slice_w * slice_h; + + /* Round up to next pow2 for bitonic sort */ + uint N = 1; + while (N < pixel_num) + N <<= 1; + N = max(N, 2); + if (N > max_pixels_per_slice) + N = max_pixels_per_slice; + + const uint plane_stride = max_pixels_per_slice*3u; + const bool use_smem = N <= 8192u; + + for (int p = 0; p < color_planes; p++) { + uint base = (slice_idx*4u + uint(p))*plane_stride; + + /* Load pixels */ + for (uint i = gl_LocalInvocationIndex; i < N; + i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) { + uint v, ndx; + if (i < pixel_num) { + uint y = i / slice_w; + uint x = i - y*slice_w; + /* Source is bound as r32ui (FF_VK_REP_NATIVE for r32_sfloat) so + * imageLoad returns the raw bit pattern of the float. */ + v = imageLoad(src[p], ivec2(sxs + x, sys + y))[0]; + if (remap_mode == 2) + v = ((v & 0x80000000u) != 0u) ? v : (v ^ 0x7FFFFFFFu); + ndx = i; + } else { + v = 0xFFFFFFFFu; + ndx = 0xFFFFFFFFu; + } + if (use_smem) { + smem[i] = u32vec2(v, ndx); + } else { + fltmap[base + 2u*i + 0u] = v; + fltmap[base + 2u*i + 1u] = ndx; + } + } + barrier(); + if (!use_smem) memoryBarrierBuffer(); + + /* Bitonic sort of the (val, ndx) pairs. */ + for (uint k = 2; k <= N; k <<= 1) { + for (uint j = k >> 1; j > 0; j >>= 1) { + for (uint i = gl_LocalInvocationIndex; i < N; + i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) { + uint partner = i ^ j; + if (partner > i) { + bool ascending = (i & k) == 0; + u32vec2 a, b; + if (use_smem) { + a = smem[i]; + b = smem[partner]; + } else { + a = u32vec2(fltmap[base + 2u*i + 0u], + fltmap[base + 2u*i + 1u]); + b = u32vec2(fltmap[base + 2u*partner + 0u], + fltmap[base + 2u*partner + 1u]); + } + bool a_gt_b = (a.x > b.x) || + (a.x == b.x && a.y > b.y); + if (a_gt_b == ascending) { + if (use_smem) { + smem[i] = b; + smem[partner] = a; + } else { + fltmap[base + 2u*i + 0u] = b.x; + fltmap[base + 2u*i + 1u] = b.y; + fltmap[base + 2u*partner + 0u] = a.x; + fltmap[base + 2u*partner + 1u] = a.y; + } + } + } + } + barrier(); + if (!use_smem) memoryBarrierBuffer(); + } + } + + /* Write sorted pairs back to global */ + if (use_smem) { + for (uint i = gl_LocalInvocationIndex; i < N; + i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) { + u32vec2 u = smem[i]; + fltmap[base + 2u*i + 0u] = u.x; + fltmap[base + 2u*i + 1u] = u.y; + } + barrier(); + } + } +} -- 2.52.0 From 0dedba039b239e4f4590c05b755cacc05549cf88 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Tue, 17 Mar 2026 13:37:39 +0100 Subject: [PATCH 2/8] ffv1enc: implement Bayer pixel format encoding Sponsored-by: Sovereign Tech Fund --- libavcodec/ffv1.c | 6 ++- libavcodec/ffv1.h | 2 + libavcodec/ffv1enc.c | 110 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 110 insertions(+), 8 deletions(-) diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c index 812989a892..1dcb7be28d 100644 --- a/libavcodec/ffv1.c +++ b/libavcodec/ffv1.c @@ -126,6 +126,8 @@ int ff_need_new_slices(int width, int num_h_slices, int chroma_shift) { int ff_slice_coord(const FFV1Context *f, int width, int sx, int num_h_slices, int chroma_shift) { int mpw = 1<<chroma_shift; + if (f->bayer) + mpw = FFMAX(mpw, 2); int awidth = FFALIGN(width, mpw); if (f->combined_version <= 0x40002) @@ -233,7 +235,7 @@ void ff_ffv1_compute_bits_per_plane(const FFV1Context *f, FFV1SliceContext *sc, av_assert0(bits_per_raw_sample > 8); //breaks with lbd, needs review if added //bits with no RCT - for (int p=0; p<3+f->transparency; p++) { + for (int p=0; p<3+f->transparency+f->bayer; p++) { bits[p] = av_ceil_log2(sc->remap_count[p]); if (mask) mask[p] = (1<<bits[p]) - 1; @@ -246,6 +248,8 @@ void ff_ffv1_compute_bits_per_plane(const FFV1Context *f, FFV1SliceContext *sc, bits[0] = av_ceil_log2(FFMAX3(sc->remap_count[0], sc->remap_count[1], sc->remap_count[2])); bits[1] = av_ceil_log2(sc->remap_count[0] + sc->remap_count[1]); bits[2] = av_ceil_log2(sc->remap_count[0] + sc->remap_count[2]); + if (f->bayer) + bits[3] = av_ceil_log2(sc->remap_count[0] + sc->remap_count[3]); //old version coded a bit more than needed if (f->combined_version < 0x40008) { diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 8a48e8e682..012b92ec21 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -153,6 +153,8 @@ typedef struct FFV1Context { int flt; int remap_mode; int remap_optimizer; + int bayer; + int bayer_order; /* 0 = RGGB (only supported value for now) */ int maxsize_warned; int use32bit; diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c index cd346495f7..5e5974c035 100644 --- a/libavcodec/ffv1enc.c +++ b/libavcodec/ffv1enc.c @@ -435,7 +435,7 @@ static void set_micro_version(FFV1Context *f) if (f->version == 3) { f->micro_version = 4; } else if (f->version == 4) { - f->micro_version = 9; + f->micro_version = 10; } else av_assert0(0); @@ -480,6 +480,8 @@ av_cold int ff_ffv1_write_extradata(AVCodecContext *avctx) put_symbol(&c, state, f->chroma_h_shift, 0); put_symbol(&c, state, f->chroma_v_shift, 0); put_rac(&c, state, f->transparency); + if (f->colorspace == 2) + put_symbol(&c, state, f->bayer_order, 0); /* 0 = RGGB */ put_symbol(&c, state, f->num_h_slices - 1, 0); put_symbol(&c, state, f->num_v_slices - 1, 0); @@ -566,9 +568,9 @@ static int sort_stt(FFV1Context *s, uint8_t stt[256]) int ff_ffv1_encode_determine_slices(AVCodecContext *avctx) { FFV1Context *s = avctx->priv_data; - int plane_count = 1 + 2*s->chroma_planes + s->transparency; - int max_h_slices = AV_CEIL_RSHIFT(avctx->width , s->chroma_h_shift); - int max_v_slices = AV_CEIL_RSHIFT(avctx->height, s->chroma_v_shift); + int plane_count = 1 + 2*s->chroma_planes + s->bayer + s->transparency; + int max_h_slices = AV_CEIL_RSHIFT(avctx->width , s->bayer ? 1 : s->chroma_h_shift); + int max_v_slices = AV_CEIL_RSHIFT(avctx->height, s->bayer ? 1 : s->chroma_v_shift); s->num_v_slices = (avctx->width > 352 || avctx->height > 288 || !avctx->slices) ? 2 : 1; s->num_v_slices = FFMIN(s->num_v_slices, max_v_slices); for (; s->num_v_slices <= 32; s->num_v_slices++) { @@ -694,6 +696,8 @@ av_cold int ff_ffv1_encode_init(AVCodecContext *avctx) s->plane_count = 2; if (!s->chroma_planes && s->version > 3) s->plane_count--; + if (s->bayer) + s->plane_count = 3; s->picture_number = 0; @@ -804,6 +808,7 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext *avctx, FFV1Context *s = avctx->priv_data; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + s->bayer = 0; s->plane_count = 3; switch(pix_fmt) { case AV_PIX_FMT_GRAY9: @@ -911,6 +916,14 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext *avctx, s->use32bit = 1; s->version = FFMAX(s->version, 1); break; + case AV_PIX_FMT_BAYER_RGGB16: + s->colorspace = 2; + s->chroma_planes = 1; + s->bits_per_raw_sample = 16; + s->use32bit = 1; + s->version = FFMAX(s->version, 4); + s->bayer = 1; + break; case AV_PIX_FMT_GBRP: case AV_PIX_FMT_0RGB32: s->colorspace = 1; @@ -1106,7 +1119,7 @@ static void encode_slice_header(FFV1Context *f, FFV1SliceContext *sc) if (sc->slice_coding_mode == 1) ff_ffv1_clear_slice_state(f, sc); put_symbol(c, state, sc->slice_coding_mode, 0); - if (sc->slice_coding_mode != 1 && f->colorspace == 1) { + if (sc->slice_coding_mode != 1 && f->colorspace != 0) { put_symbol(c, state, sc->slice_rct_by_coef, 0); put_symbol(c, state, sc->slice_rct_ry_coef, 0); } @@ -1569,6 +1582,86 @@ static int encode_float32_rgb_frame(FFV1Context *f, FFV1SliceContext *sc, return 0; } +static int encode_bayer_frame(FFV1Context *f, FFV1SliceContext *sc, + const uint8_t *src[4], + int w, int h, const int stride[4], int ac) +{ + const int pass1 = !!(f->avctx->flags & AV_CODEC_FLAG_PASS1); + const int ring_size = f->context_model ? 3 : 2; + TYPE *sample[4][3]; + + int bits[4], offset; + ff_ffv1_compute_bits_per_plane(f, sc, bits, &offset, NULL, f->bits_per_raw_sample); + + w >>= 1; + + sc->run_index = 0; + + for (int p = 0; p < MAX_PLANES; ++p) + sample[p][2] = RENAME(sc->sample_buffer); + + memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES * + (w + 6) * sizeof(*RENAME(sc->sample_buffer))); + + for (int y = 0; y < h; y += 2) { + for (int i = 0; i < ring_size; i++) + for (int p = 0; p < MAX_PLANES; p++) + sample[p][i] = RENAME(sc->sample_buffer) + p*ring_size*(w+6) + + ((h+i-y/2) % ring_size)*(w+6) + 3; + + for (int x = 0; x < w; x++) { + const uint16_t *l1 = ((const uint16_t*)(src[0] + stride[0]*(y + 0) + x*2*2)); + const uint16_t *l2 = ((const uint16_t*)(src[0] + stride[0]*(y + 1) + x*2*2)); + + int r, gr, gb, b; + r = l1[0]; + gr = l1[1]; + gb = l2[0]; + b = l2[1]; + + if (sc->slice_coding_mode != 1) { + /** + * Bayer 2x2 RCT, based on: + * "Reversible color transform for Bayer color filter array images", S. Poomrittigul et al, + * APSIPA Transactions on Signal and Information Processing (2013) 2 (1): 1-10, + * doi:10.1017/ATSIP.2013.6 */ + int gd = gr - gb; + int gm = gb + (gd >> 1); + + b -= gm; + r -= gm; + gm += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; + b += offset; + r += offset; + gd += offset; + + gr = gm; + gb = gd; + } + + sample[0][0][x] = gr; + sample[1][0][x] = gb; + sample[2][0][x] = b; + sample[3][0][x] = r; + } + + for (int p = 0; p < 4; p++) { + int ret; + sample[p][0][-1] = sample[p][1][0 ]; + sample[p][1][ w] = sample[p][1][w-1]; + /* Plane contexts: gm=0 (luma), b-gm/r-gm=1 (chroma diff from + * green), gd=2 (own context - green-green diff has different + * statistics from both luma and chroma). */ + ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], + p == 1 ? 2 : (p > 1), + bits[p], ac, pass1); + if (ret < 0) + return ret; + } + } + + return 0; +} static int encode_slice(AVCodecContext *c, void *arg) { @@ -1664,6 +1757,8 @@ retry: ret |= encode_plane(f, sc, p->data[0] + (ps>>1) + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 1, 2, ac); } else if (f->bits_per_raw_sample == 32) { ret = encode_float32_rgb_frame(f, sc, planes, width, height, p->linesize, ac); + } else if (f->bayer) { + ret = encode_bayer_frame(f, sc, planes, width, height, p->linesize, ac); } else if (f->use32bit) { ret = encode_rgb_frame32(f, sc, planes, width, height, p->linesize, ac); } else { @@ -1706,7 +1801,7 @@ size_t ff_ffv1_encode_buffer_size(AVCodecContext *avctx) if (f->version > 3) { maxsize *= f->bits_per_raw_sample + 1; if (f->remap_mode) - maxsize += f->slice_count * 70000 * (1 + 2*f->chroma_planes + f->transparency); + maxsize += f->slice_count * 70000 * (1 + 2*f->chroma_planes + f->bayer + f->transparency); } else { maxsize += f->slice_count * 2 * (avctx->width + avctx->height); //for bug with slices that code some pixels more than once maxsize *= 8*(2*f->bits_per_raw_sample + 5); @@ -1957,7 +2052,8 @@ const FFCodec ff_ffv1_encoder = { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YAF16, AV_PIX_FMT_GRAYF16, - AV_PIX_FMT_GBRPF16, AV_PIX_FMT_GBRPF32), + AV_PIX_FMT_GBRPF16, AV_PIX_FMT_GBRPF32, + AV_PIX_FMT_BAYER_RGGB16), .color_ranges = AVCOL_RANGE_MPEG, .p.priv_class = &ffv1_class, .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH, -- 2.52.0 From 0128e4b0f0f6cc3ed6d366dbe501c1c8be32feac Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Tue, 17 Mar 2026 14:05:51 +0100 Subject: [PATCH 3/8] ffv1dec: implement Bayer pixel format encoding Sponsored-by: Sovereign Tech Fund --- libavcodec/ffv1_parse.c | 15 ++++++++- libavcodec/ffv1dec.c | 75 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/libavcodec/ffv1_parse.c b/libavcodec/ffv1_parse.c index 10f3652ff5..f53b5c8e84 100644 --- a/libavcodec/ffv1_parse.c +++ b/libavcodec/ffv1_parse.c @@ -117,7 +117,15 @@ int ff_ffv1_read_extra_header(FFV1Context *f) f->chroma_h_shift = ff_ffv1_get_symbol(&c, state, 0); f->chroma_v_shift = ff_ffv1_get_symbol(&c, state, 0); f->transparency = get_rac(&c, state); - f->plane_count = 1 + (f->chroma_planes || f->version<4) + f->transparency; + f->bayer = (f->colorspace == 2); + if (f->bayer) { + f->bayer_order = ff_ffv1_get_symbol(&c, state, 0); + if (f->bayer_order != 0) { + av_log(f->avctx, AV_LOG_ERROR, "Unsupported bayer order %d\n", f->bayer_order); + return AVERROR_PATCHWELCOME; + } + } + f->plane_count = 1 + (f->chroma_planes || f->version<4) + f->transparency + f->bayer; f->num_h_slices = 1 + ff_ffv1_get_symbol(&c, state, 0); f->num_v_slices = 1 + ff_ffv1_get_symbol(&c, state, 0); @@ -430,6 +438,11 @@ int ff_ffv1_parse_header(FFV1Context *f, RangeCoder *c, uint8_t *state) } f->use32bit = 1; } + } else if (f->colorspace == 2) { + if (f->avctx->bits_per_raw_sample == 16) { + f->pix_fmt = AV_PIX_FMT_BAYER_RGGB16; + f->use32bit = 1; + } } else { av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n"); return AVERROR(ENOSYS); diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 54fe769fca..6677e5c8b4 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -249,7 +249,7 @@ static int decode_slice_header(const FFV1Context *f, if (f->version > 3) { sc->slice_reset_contexts = get_rac(c, state); sc->slice_coding_mode = ff_ffv1_get_symbol(c, state, 0); - if (sc->slice_coding_mode != 1 && f->colorspace == 1) { + if (sc->slice_coding_mode != 1 && f->colorspace != 0) { sc->slice_rct_by_coef = ff_ffv1_get_symbol(c, state, 0); sc->slice_rct_ry_coef = ff_ffv1_get_symbol(c, state, 0); if ((uint64_t)sc->slice_rct_by_coef + (uint64_t)sc->slice_rct_ry_coef > 4) { @@ -374,6 +374,76 @@ static int decode_remap(FFV1Context *f, FFV1SliceContext *sc) return 0; } +static int decode_bayer_frame(FFV1Context *f, FFV1SliceContext *sc, + GetBitContext *gb, + uint8_t *src, int w, int h, int stride) +{ + int x, y, p; + TYPE *sample[4][2]; + int ac = f->ac; + unsigned mask[4]; + + int bits[4], offset; + ff_ffv1_compute_bits_per_plane(f, sc, bits, &offset, mask, f->avctx->bits_per_raw_sample); + + w >>= 1; + + if (sc->slice_coding_mode == 1) + ac = 1; + + for (x = 0; x < 4; x++) { + sample[x][0] = RENAME(sc->sample_buffer) + x * 2 * (w + 6) + 3; + sample[x][1] = RENAME(sc->sample_buffer) + (x * 2 + 1) * (w + 6) + 3; + } + + sc->run_index = 0; + + memset(RENAME(sc->sample_buffer), 0, 8 * (w + 6) * sizeof(*RENAME(sc->sample_buffer))); + + for (y = 0; y < h; y += 2) { + for (p = 0; p < 4; p++) { + int ret; + TYPE *temp = sample[p][0]; // FIXME: try a normal buffer + + sample[p][0] = sample[p][1]; + sample[p][1] = temp; + + sample[p][1][-1]= sample[p][0][0 ]; + sample[p][0][ w]= sample[p][0][w-1]; + ret = RENAME(decode_line)(f, sc, gb, w, sample[p], + p == 1 ? 2 : (p > 1), bits[p], ac); + if (ret < 0) + return ret; + } + + for (x = 0; x < w; x++) { + int g_r = sample[0][1][x]; + int g_b = sample[1][1][x]; + int b = sample[2][1][x]; + int r = sample[3][1][x]; + + if (sc->slice_coding_mode != 1) { + b -= offset; + r -= offset; + g_r -= (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; + b += g_r; + r += g_r; + + /* Recover green pair: encoder stored gm = gb + (gd >> 1), gd = gr - gb */ + int gd = g_b - offset; + g_b = g_r - (gd >> 1); + g_r = g_b + gd; + } + + *((uint16_t*)(src + (x*2 + 0)*2 + stride*(y + 0))) = r; + *((uint16_t*)(src + (x*2 + 1)*2 + stride*(y + 0))) = g_r; + *((uint16_t*)(src + (x*2 + 0)*2 + stride*(y + 1))) = g_b; + *((uint16_t*)(src + (x*2 + 1)*2 + stride*(y + 1))) = b; + } + } + return 0; +} + static int decode_slice(AVCodecContext *c, void *arg) { FFV1Context *f = c->priv_data; @@ -449,6 +519,9 @@ static int decode_slice(AVCodecContext *c, void *arg) } else if (f->colorspace == 0) { decode_plane(f, sc, &gb, p->data[0] + ps*x + y*p->linesize[0] , width, height, p->linesize[0], 0, 0, 2, ac); decode_plane(f, sc, &gb, p->data[0] + ps*x + y*p->linesize[0] + (ps>>1), width, height, p->linesize[0], 1, 1, 2, ac); + } else if (f->bayer) { + decode_bayer_frame(f, sc, &gb, p->data[0] + ps * x + y * p->linesize[0], + width, height, p->linesize[0]); } else if (f->use32bit) { uint8_t *planes[4] = { p->data[0] + ps * x + y * p->linesize[0], p->data[1] + ps * x + y * p->linesize[1], -- 2.52.0 From 4d73d08274d7446c9ca4c9a50c852ab567c73d53 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Tue, 26 May 2026 17:26:18 +0900 Subject: [PATCH 4/8] ffv1enc: add RCT coefficient search for Bayer Sponsored-by: Sovereign Tech Fund --- libavcodec/ffv1enc.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c index 5e5974c035..d31a2c19ed 100644 --- a/libavcodec/ffv1enc.c +++ b/libavcodec/ffv1enc.c @@ -1222,6 +1222,69 @@ static void choose_rct_params(const FFV1Context *f, FFV1SliceContext *sc, sc->slice_rct_ry_coef = rct_y_coeff[best][0]; } +static void choose_rct_params_bayer(const FFV1Context *f, FFV1SliceContext *sc, + const uint8_t *src[4], const int stride[4], + int w, int h) +{ + static const int rct_y_coeff[NB_Y_COEFF][2] = { + { 0, 0 }, { 1, 1 }, { 2, 2 }, { 0, 2 }, { 2, 0 }, { 4, 0 }, { 0, 4 }, { 0, 3 }, + { 3, 0 }, { 3, 1 }, { 1, 3 }, { 1, 2 }, { 2, 1 }, { 0, 1 }, { 1, 0 }, + }; + int stat[NB_Y_COEFF] = {0}; + int16_t *sample[3]; + int i, best; + + /* Walk in 2x2 blocks, build per-block gm/b/r, evaluate prediction-error */ + w >>= 1; + for (i = 0; i < 3; i++) + sample[i] = sc->sample_buffer + i*w; + + for (int y = 0; y < h; y += 2) { + int last_gm = 0, last_b = 0, last_r = 0; + for (int x = 0; x < w; x++) { + const uint16_t *l1 = (const uint16_t *)(src[0] + stride[0]*(y + 0) + x*2*2); + const uint16_t *l2 = (const uint16_t *)(src[0] + stride[0]*(y + 1) + x*2*2); + int r = l1[0]; + int gr = l1[1]; + int gb = l2[0]; + int b = l2[1]; + int gd = gr - gb; + int gm = gb + (gd >> 1); + + int agm = gm - last_gm; + int ab = b - last_b; + int ar = r - last_r; + + if (x && y) { + int bgm = agm - sample[0][x]; + int bb = ab - sample[1][x]; + int br = ar - sample[2][x]; + + br -= bgm; + bb -= bgm; + + for (i = 0; i < NB_Y_COEFF; i++) + stat[i] += FFABS(bgm + ((br*rct_y_coeff[i][0] + bb*rct_y_coeff[i][1]) >> 2)); + } + sample[0][x] = agm; + sample[1][x] = ab; + sample[2][x] = ar; + + last_gm = gm; + last_b = b; + last_r = r; + } + } + + best = 0; + for (i = 1; i < NB_Y_COEFF; i++) + if (stat[i] < stat[best]) + best = i; + + sc->slice_rct_by_coef = rct_y_coeff[best][1]; + sc->slice_rct_ry_coef = rct_y_coeff[best][0]; +} + static void encode_histogram_remap(FFV1Context *f, FFV1SliceContext *sc) { int len = 1 << f->bits_per_raw_sample; @@ -1686,6 +1749,8 @@ static int encode_slice(AVCodecContext *c, void *arg) sc->slice_coding_mode = 0; if (f->version > 3 && f->colorspace == 1) { choose_rct_params(f, sc, planes, p->linesize, width, height); + } else if (f->bayer) { + choose_rct_params_bayer(f, sc, planes, p->linesize, width, height); } else { sc->slice_rct_by_coef = 1; sc->slice_rct_ry_coef = 1; -- 2.52.0 From f3e8fba2a364569b606267f0862679c7eed34a72 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Wed, 27 May 2026 05:03:53 +0900 Subject: [PATCH 5/8] ffv1enc: write f->flt to extradata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parser has been reading f->flt for combined_version >= 0x40004 since commit c1b330bf24 (avcodec/ffv1: Basic float16 support), but ff_ffv1_write_extradata() never had a matching put_symbol(). The result was that the parsed f->flt was whatever the next symbol's worth of rangecoded bits happened to decode to — often 0, but for a yuv420p16le -level 4 -strict experimental stream produced locally it parses as 1. The software decoder doesn't notice because the YUV pixfmt-selection branches never check f->flt, but anything else that trusts it gets garbage. Sponsored-by: Sovereign Tech Fund --- libavcodec/ffv1enc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c index d31a2c19ed..eb55d3ba32 100644 --- a/libavcodec/ffv1enc.c +++ b/libavcodec/ffv1enc.c @@ -506,6 +506,8 @@ av_cold int ff_ffv1_write_extradata(AVCodecContext *avctx) if (f->version > 2) { put_symbol(&c, state, f->ec, 0); put_symbol(&c, state, f->intra = (f->avctx->gop_size < 2), 0); + if (f->combined_version >= 0x40004) + put_symbol(&c, state, f->flt, 0); } f->avctx->extradata_size = ff_rac_terminate(&c, 0); -- 2.52.0 From 8f3e8128c4660a0c5cc1c7060564ac5468b13cc7 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Wed, 27 May 2026 04:02:37 +0900 Subject: [PATCH 6/8] vulkan_ffv1: add Bayer decoder Sponsored-by: Sovereign Tech Fund --- libavcodec/ffv1_vulkan.c | 9 ++ libavcodec/vulkan/Makefile | 4 +- libavcodec/vulkan/ffv1_common.glsl | 2 + libavcodec/vulkan/ffv1_dec.comp.glsl | 82 ++++++++++++++++++- libavcodec/vulkan/ffv1_dec_bayer.comp.glsl | 31 +++++++ .../vulkan/ffv1_dec_bayer_golomb.comp.glsl | 32 ++++++++ libavcodec/vulkan/ffv1_dec_setup.comp.glsl | 2 +- libavcodec/vulkan/ffv1_vlc.glsl | 2 +- libavcodec/vulkan_ffv1.c | 33 ++++++-- 9 files changed, 184 insertions(+), 13 deletions(-) create mode 100644 libavcodec/vulkan/ffv1_dec_bayer.comp.glsl create mode 100644 libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c index 73c2b2a7ce..81843b1701 100644 --- a/libavcodec/ffv1_vulkan.c +++ b/libavcodec/ffv1_vulkan.c @@ -41,6 +41,15 @@ void ff_ffv1_vk_set_common_sl(AVCodecContext *avctx, FFV1Context *f, } int bits = desc->comp[0].depth; + /* Bayer pixfmts report misleading per-component depth in comp[0].depth + * (it counts the fraction of bits each component contributes per output + * pixel, not the per-sample bit width). Use bits_per_raw_sample. The + * encoder fills f->bits_per_raw_sample directly; the decoder only + * fills f->avctx->bits_per_raw_sample. Prefer the FFV1Context field + * with the avctx field as a fallback so this works from both sides. */ + if (f->bayer) + bits = f->bits_per_raw_sample ? f->bits_per_raw_sample + : f->avctx->bits_per_raw_sample; SPEC_LIST_ADD(sl, 5, 32, (uint32_t)(1ULL << bits)); SPEC_LIST_ADD(sl, 6, 32, f->colorspace); SPEC_LIST_ADD(sl, 7, 32, f->transparency); diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index f86931727d..0425548978 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -25,7 +25,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \ vulkan/ffv1_dec_rgb.comp.spv.o \ vulkan/ffv1_dec_rgb_golomb.comp.spv.o \ vulkan/ffv1_dec_rgb_float.comp.spv.o \ - vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o + vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o \ + vulkan/ffv1_dec_bayer.comp.spv.o \ + vulkan/ffv1_dec_bayer_golomb.comp.spv.o OBJS-$(CONFIG_PRORES_KS_VULKAN_ENCODER) += vulkan/prores_ks_alpha_data.comp.spv.o \ vulkan/prores_ks_slice_data.comp.spv.o \ diff --git a/libavcodec/vulkan/ffv1_common.glsl b/libavcodec/vulkan/ffv1_common.glsl index 3d3b6753c6..36bce88a4a 100644 --- a/libavcodec/vulkan/ffv1_common.glsl +++ b/libavcodec/vulkan/ffv1_common.glsl @@ -122,6 +122,8 @@ layout (set = 1, binding = 0, scalar) SB_QUALI buffer slice_ctx_buf { uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) { uint mpw = 1 << chroma_shift; + if (colorspace == 2) + mpw = max(mpw, 2u); uint awidth = align(width, mpw); if ((version < 4) || ((version == 4) && (micro_version < 3))) diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl b/libavcodec/vulkan/ffv1_dec.comp.glsl index 82835e8f92..2527f988f2 100644 --- a/libavcodec/vulkan/ffv1_dec.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec.comp.glsl @@ -247,6 +247,48 @@ void decode_line(ivec2 sp, int w, } #endif +#ifdef BAYER +void writeout_bayer(uint slice_idx, in SliceContext sc, ivec2 sp, int w, int y) +{ + memoryBarrierImage(); + barrier(); + + int offset = rct_offset; + + for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { + ivec2 lpos = sp + LADDR(ivec2(x, y)); + ivec2 pos = sc.slice_pos + ivec2(int(x) << 1, y << 1); + + /* Plane order set by encoder (Variant A): + * dec[0]=gm (or gr in PCM), dec[1]=gd (or gb in PCM), + * dec[2]=b, dec[3]=r */ + int g_r = int(imageLoad(dec[0], lpos)[0]); + int g_b = int(imageLoad(dec[1], lpos)[0]); + int b = int(imageLoad(dec[2], lpos)[0]); + int r = int(imageLoad(dec[3], lpos)[0]); + + if (sc.slice_coding_mode != 1) { + b -= offset; + r -= offset; + g_r -= (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2; + b += g_r; + r += g_r; + + /* Recover green pair: encoder stored gm = gb + (gd >> 1), + * gd = gr - gb (with +offset on gd). */ + int gd = g_b - offset; + g_b = g_r - (gd >> 1); + g_r = g_b + gd; + } + + imageStore(dst[0], pos + ivec2(0, 0), uvec4(r)); + imageStore(dst[0], pos + ivec2(1, 0), uvec4(g_r)); + imageStore(dst[0], pos + ivec2(0, 1), uvec4(g_b)); + imageStore(dst[0], pos + ivec2(1, 1), uvec4(b)); + } +} +#endif + #ifdef RGB ivec4 transform_sample(ivec4 pix, ivec2 rct_coef, int offset) { @@ -319,14 +361,32 @@ void decode_slice(in SliceContext sc, uint slice_idx) ivec2 sp = sc.slice_pos; u16vec4 bits = get_slice_bits(sc); -#ifdef RGB +#ifdef BAYER + /* Bayer logical dims: 2x2 blocks at half resolution */ + w >>= 1; + int bayer_h = sc.slice_dim.y >> 1; + sp.x >>= 1; + sp.y = int(gl_WorkGroupID.y)*rgb_linecache; + /* c_bits = bps + 1 (the +1 is for is_rgb). For PCM mode, all planes use + * raw bps. For non-PCM, gm uses bps; gd/b-gm/r-gm use bps+1. */ + if (sc.slice_coding_mode == 0) + bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits); + else + bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1); +#elif defined(RGB) sp.y = int(gl_WorkGroupID.y)*rgb_linecache; #endif #ifndef GOLOMB /* PCM coding */ if (sc.slice_coding_mode == 1) { -#ifdef RGB +#ifdef BAYER + for (int y = 0; y < bayer_h; y++) { + for (int p = 0; p < 4; p++) + decode_line_pcm(sp, w, y, p); + writeout_bayer(slice_idx, sc, sp, w, y); + } +#elif defined(RGB) for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) decode_line_pcm(sp, w, y, p); @@ -347,16 +407,32 @@ void decode_slice(in SliceContext sc, uint slice_idx) } #endif +#ifdef BAYER + /* Variant A plane-context mapping: gm=0, gd=2, b-gm=1, r-gm=1 */ + u8vec4 quant_table_idx = sc.quant_table_idx.xzyy; + u32vec4 slice_state_off = (slice_idx*codec_planes + + uvec4(0, 2, 1, 1))*plane_state_size; +#else u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; +#endif #ifdef GOLOMB slice_state_off >>= 3; // division by VLC_STATE_SIZE golomb_init(); #endif -#ifdef RGB +#ifdef BAYER + int run_index = 0; + for (int y = 0; y < bayer_h; y++) { + for (int p = 0; p < 4; p++) + decode_line(sp, w, y, p, bits[p], + slice_state_off[p], quant_table_idx[p], run_index); + + writeout_bayer(slice_idx, sc, sp, w, y); + } +#elif defined(RGB) int run_index = 0; for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) diff --git a/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl new file mode 100644 index 0000000000..6ceb15a35a --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl @@ -0,0 +1,31 @@ +/* + * FFv1 codec + * + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_shader_image_load_formatted : require + +layout (set = 1, binding = 5) writeonly uniform uimage2D dst[]; + +#define RGB +#define BAYER +#include "ffv1_dec.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl new file mode 100644 index 0000000000..ca8b7bada0 --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl @@ -0,0 +1,32 @@ +/* + * FFv1 codec + * + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_shader_image_load_formatted : require + +layout (set = 1, binding = 5) writeonly uniform uimage2D dst[]; + +#define RGB +#define BAYER +#define GOLOMB +#include "ffv1_dec.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl index ff57c57dc3..d000116012 100644 --- a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl @@ -191,7 +191,7 @@ bool decode_slice_header(uint slice_idx, inout SliceContext sc) if (version >= 4) { sc.slice_reset_contexts = get_rac(rc_state[0]); sc.slice_coding_mode = get_usymbol(0); - if (sc.slice_coding_mode != 1 && colorspace == 1) { + if (sc.slice_coding_mode != 1 && colorspace != 0) { sc.slice_rct_coef.g = int(get_usymbol(0)); sc.slice_rct_coef.r = int(get_usymbol(0)); if (sc.slice_rct_coef.g + sc.slice_rct_coef.r > 4) diff --git a/libavcodec/vulkan/ffv1_vlc.glsl b/libavcodec/vulkan/ffv1_vlc.glsl index f362d3afbb..51f3d7ddae 100644 --- a/libavcodec/vulkan/ffv1_vlc.glsl +++ b/libavcodec/vulkan/ffv1_vlc.glsl @@ -35,7 +35,7 @@ void update_vlc_state(inout VlcState state, in int v) int drift = state.drift; int count = state.count; int bias = state.bias; - state.error_sum += uint16_t(abs(v)); + state.error_sum += uint32_t(abs(v)); drift += v; if (count == 128) { // FIXME: variable diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index 4056f3958a..e8dafb2505 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -54,6 +54,12 @@ extern const unsigned int ff_ffv1_dec_rgb_float_comp_spv_len; extern const unsigned char ff_ffv1_dec_rgb_float_golomb_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_rgb_float_golomb_comp_spv_len; +extern const unsigned char ff_ffv1_dec_bayer_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_bayer_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_bayer_golomb_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_bayer_golomb_comp_spv_len; + const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { .codec_id = AV_CODEC_ID_FFV1, .queue_flags = VK_QUEUE_COMPUTE_BIT, @@ -393,7 +399,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) nb_img_bar = 0; nb_buf_bar = 0; - for (int i = 0; i < color_planes; i++) + /* The intermediate frame has 4 planes (GBRAP16/32). Clear all of + * them since the bayer decoder uses all four. */ + int n_dec_planes = f->bayer ? 4 : color_planes; + for (int i = 0; i < n_dec_planes; i++) vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL, &((VkClearColorValue) { 0 }), 1, &((VkImageSubresourceRange) { @@ -519,7 +528,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) 1, 5, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - if (fltmap_buf) + if (fltmap_buf && !f->bayer) ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 6, 0, fltmap_buf, @@ -651,7 +660,8 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd, AVHWFramesContext *dec_frames_ctx, AVHWFramesContext *out_frames_ctx, - VkSpecializationInfo *sl, int ac, int rgb) + VkSpecializationInfo *sl, int ac, int rgb, + int bayer) { int err; @@ -707,10 +717,19 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set, - 5 + rgb + (f->micro_version >= 9), + 5 + rgb + (!bayer && f->micro_version >= 9), 0, 0); - if (f->version >=4 && f->micro_version >= 9) { + if (bayer) { + if (ac == AC_GOLOMB_RICE) + ff_vk_shader_link(s, shd, + ff_ffv1_dec_bayer_golomb_comp_spv_data, + ff_ffv1_dec_bayer_golomb_comp_spv_len, "main"); + else + ff_vk_shader_link(s, shd, + ff_ffv1_dec_bayer_comp_spv_data, + ff_ffv1_dec_bayer_comp_spv_len, "main"); + } else if (f->version >=4 && f->micro_version >= 9) { if (ac == AC_GOLOMB_RICE) ff_vk_shader_link(s, shd, ff_ffv1_dec_rgb_float_golomb_comp_spv_data, @@ -809,7 +828,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) FFv1VulkanDecodeContext *fv; if (f->version < 3 || - (f->version == 4 && f->micro_version >= 10)) + (f->version == 4 && f->micro_version >= 10 && !f->bayer)) return AVERROR(ENOTSUP); /* Streams with a low amount of slices will usually be much slower @@ -861,7 +880,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) /* Decode shaders */ RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode, - dctx, hwfc, sl, f->ac, is_rgb)); + dctx, hwfc, sl, f->ac, is_rgb, f->bayer)); /* Init static data */ RET(ff_ffv1_vk_init_consts(&ctx->s, &fv->consts_buf, f)); -- 2.52.0 From 7a96478f23f670cd117b04029fd3aebd93a4e65d Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Wed, 27 May 2026 04:21:53 +0900 Subject: [PATCH 7/8] vulkan_ffv1: detect float remap from pixfmt, not f->flt The decode-shader picker fell over for integer remapped streams. Sponsored-by: Sovereign Tech Fund --- libavcodec/vulkan_ffv1.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index e8dafb2505..ce2e392233 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -169,8 +169,9 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx, if (err < 0) return err; - /* Allocate slice offsets/status buffer */ - if (f->version >=4 && f->micro_version >= 9) { + /* Allocate slice offsets/status buffer (note, for integer+remap, we don't need it) */ + if (f->version >=4 && f->micro_version >= 9 && + (av_pix_fmt_desc_get(sw_format)->flags & AV_PIX_FMT_FLAG_FLOAT)) { err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_fltmap_pool, &fp->slice_fltmap_buf, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, @@ -528,7 +529,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) 1, 5, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - if (fltmap_buf && !f->bayer) + if (fltmap_buf) ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 6, 0, fltmap_buf, @@ -716,8 +717,16 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; + /* Detect a float output from the pixfmt descriptor instead of f->flt: + * the encoder side does not (yet) write f->flt to the extradata, so the + * parsed value is unreliable for some v4m4+ streams. The descriptor's + * FLOAT flag is set by the pixfmt selection logic and is accurate */ + int is_float = !!(av_pix_fmt_desc_get(out_frames_ctx->sw_format)->flags & + AV_PIX_FMT_FLAG_FLOAT); + + /* Bindings 5 (dst) and 6 (fltmap_buf) are conditional */ ff_vk_shader_add_descriptor_set(s, shd, desc_set, - 5 + rgb + (!bayer && f->micro_version >= 9), + 5 + rgb + (is_float && !bayer), 0, 0); if (bayer) { @@ -729,7 +738,7 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, ff_vk_shader_link(s, shd, ff_ffv1_dec_bayer_comp_spv_data, ff_ffv1_dec_bayer_comp_spv_len, "main"); - } else if (f->version >=4 && f->micro_version >= 9) { + } else if (is_float) { if (ac == AC_GOLOMB_RICE) ff_vk_shader_link(s, shd, ff_ffv1_dec_rgb_float_golomb_comp_spv_data, @@ -827,8 +836,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) FFVulkanDecodeShared *ctx = NULL; FFv1VulkanDecodeContext *fv; - if (f->version < 3 || - (f->version == 4 && f->micro_version >= 10 && !f->bayer)) + if (f->version < 3) return AVERROR(ENOTSUP); /* Streams with a low amount of slices will usually be much slower -- 2.52.0 From 438fe7d7d885c1e72a8690eaff2478b5a0ea9224 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 29 May 2026 04:11:45 +0900 Subject: [PATCH 8/8] vulkan_ffv1: add Bayer encoder --- libavcodec/ffv1enc_vulkan.c | 23 +++- libavcodec/vulkan/Makefile | 4 +- libavcodec/vulkan/ffv1_enc.comp.glsl | 118 +++++++++++++++--- libavcodec/vulkan/ffv1_enc_bayer.comp.glsl | 31 +++++ .../vulkan/ffv1_enc_bayer_golomb.comp.glsl | 32 +++++ libavcodec/vulkan/ffv1_enc_setup.comp.glsl | 14 ++- 6 files changed, 191 insertions(+), 31 deletions(-) create mode 100644 libavcodec/vulkan/ffv1_enc_bayer.comp.glsl create mode 100644 libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 7c22ced785..40e758f093 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -150,6 +150,12 @@ extern const unsigned int ff_ffv1_enc_rgb_float_golomb_comp_spv_len; extern const unsigned char ff_ffv1_enc_sort32_comp_spv_data[]; extern const unsigned int ff_ffv1_enc_sort32_comp_spv_len; +extern const unsigned char ff_ffv1_enc_bayer_comp_spv_data[]; +extern const unsigned int ff_ffv1_enc_bayer_comp_spv_len; + +extern const unsigned char ff_ffv1_enc_bayer_golomb_comp_spv_data[]; +extern const unsigned int ff_ffv1_enc_bayer_golomb_comp_spv_len; + static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec, AVFrame *enc_in, VkImageView *enc_in_views, FFVkBuffer *slice_data_buf, uint32_t slice_data_size, @@ -1210,7 +1216,16 @@ static int init_encode_shader(AVCodecContext *avctx, VkSpecializationInfo *sl) ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 4 + fv->is_rgb + !!f->remap_mode, 0, 0); - if (f->remap_mode) { + if (f->bayer) { + if (fv->ctx.ac == AC_GOLOMB_RICE) + ff_vk_shader_link(&fv->s, shd, + ff_ffv1_enc_bayer_golomb_comp_spv_data, + ff_ffv1_enc_bayer_golomb_comp_spv_len, "main"); + else + ff_vk_shader_link(&fv->s, shd, + ff_ffv1_enc_bayer_comp_spv_data, + ff_ffv1_enc_bayer_comp_spv_len, "main"); + } else if (f->remap_mode) { if (fv->ctx.ac == AC_GOLOMB_RICE) ff_vk_shader_link(&fv->s, shd, ff_ffv1_enc_rgb_float_golomb_comp_spv_data, @@ -1288,9 +1303,9 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) return AVERROR_INVALIDDATA; } - /* We target version 4.3 */ + /* We target version 4.3 by default */ if (f->version == 4) - f->micro_version = f->remap_mode ? 9 : 3; + f->micro_version = (f->remap_mode || f->bayer) ? 9 : 3; f->num_h_slices = fv->num_h_slices; f->num_v_slices = fv->num_v_slices; @@ -1437,7 +1452,7 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) /* Init rct search shader */ fv->optimize_rct = fv->is_rgb && f->version >= 4 && - !fv->force_pcm && fv->optimize_rct; + !fv->force_pcm && fv->optimize_rct && !f->bayer; /* Init shader specialization consts */ SPEC_LIST_CREATE(sl, 19, 19*sizeof(uint32_t)) diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 0425548978..3c47cbd58b 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -15,7 +15,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/ffv1_enc_setup.comp.spv.o \ vulkan/ffv1_enc_remap.comp.spv.o \ vulkan/ffv1_enc_rgb_float.comp.spv.o \ vulkan/ffv1_enc_rgb_float_golomb.comp.spv.o \ - vulkan/ffv1_enc_sort32.comp.spv.o + vulkan/ffv1_enc_sort32.comp.spv.o \ + vulkan/ffv1_enc_bayer.comp.spv.o \ + vulkan/ffv1_enc_bayer_golomb.comp.spv.o OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \ vulkan/ffv1_dec_reset.comp.spv.o \ diff --git a/libavcodec/vulkan/ffv1_enc.comp.glsl b/libavcodec/vulkan/ffv1_enc.comp.glsl index 1c30e91828..a4a942782d 100644 --- a/libavcodec/vulkan/ffv1_enc.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc.comp.glsl @@ -79,6 +79,9 @@ void encode_line_pcm(in SliceContext sc, readonly uimage2D img, return; int w = sc.slice_dim.x; +#ifdef BAYER + w >>= 1; +#endif #ifndef RGB if (p > 0 && p < 3) { @@ -100,6 +103,9 @@ void encode_line(in SliceContext sc, readonly uimage2D img, uint state_off, uint8_t quant_table_idx, in int run_index) { int w = sc.slice_dim.x; +#ifdef BAYER + w >>= 1; +#endif #ifndef RGB if (p > 0 && p < 3) { @@ -160,6 +166,9 @@ void encode_line(in SliceContext sc, readonly uimage2D img, uint state_off, uint8_t quant_table_idx, inout int run_index) { int w = sc.slice_dim.x; +#ifdef BAYER + w >>= 1; +#endif #ifndef RGB if (p > 0 && p < 3) { @@ -301,6 +310,41 @@ void preload_rgb(uint slice_idx, in SliceContext sc, ivec2 sp, int w, int y, memoryBarrierImage(); barrier(); } + +#ifdef BAYER +void preload_bayer(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) +{ + int offset = rct_offset; + + for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { + ivec2 lpos = sp + LADDR(ivec2(x, y)); + ivec2 src_pos = sc.slice_pos + ivec2(int(x) << 1, y << 1); + + int r = int(imageLoad(src[0], src_pos + ivec2(0, 0))[0]); + int gr = int(imageLoad(src[0], src_pos + ivec2(1, 0))[0]); + int gb = int(imageLoad(src[0], src_pos + ivec2(0, 1))[0]); + int b = int(imageLoad(src[0], src_pos + ivec2(1, 1))[0]); + + if (apply_rct) { + int gd = gr - gb; + int gm = gb + (gd >> 1); + b -= gm; + r -= gm; + gm += (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2; + b += offset; + r += offset; + gd += offset; + gr = gm; + gb = gd; + } + + imageStore(tmp, lpos, ivec4(gr, gb, b, r)); + } + + memoryBarrierImage(); + barrier(); +} +#endif #endif void encode_slice(in SliceContext sc, uint slice_idx) @@ -308,13 +352,39 @@ void encode_slice(in SliceContext sc, uint slice_idx) ivec2 sp = sc.slice_pos; u16vec4 bits = get_slice_bits(sc); -#ifdef RGB +#ifdef BAYER + int bayer_w = sc.slice_dim.x >> 1; + int bayer_h = sc.slice_dim.y >> 1; + sp.x >>= 1; + sp.y = int(gl_WorkGroupID.y)*rgb_linecache; + /* c_bits = bps + 1 for is_rgb pixfmts (Bayer is treated as RGB). gm uses + * raw bps; gd/b-gm/r-gm need an extra bit for the RCT difference. PCM + * stores raw samples so all planes use bps. */ + if (sc.slice_coding_mode == 0) + bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits); + else + bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1); +#elif defined(RGB) sp.y = int(gl_WorkGroupID.y)*rgb_linecache; #endif #ifndef GOLOMB if (force_pcm) { -#ifndef RGB +#ifdef BAYER + for (int y = 0; y < bayer_h; y++) { + preload_bayer(sc, sp, bayer_w, y, false); + + for (uint c = 0; c < 4; c++) + encode_line_pcm(sc, tmp, sp, y, 0, c); + } +#elif defined(RGB) + for (int y = 0; y < sc.slice_dim.y; y++) { + preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, false); + + for (uint c = 0; c < color_planes; c++) + encode_line_pcm(sc, tmp, sp, y, 0, rgb_plane_order[c]); + } +#else for (int c = 0; c < color_planes; c++) { int h = sc.slice_dim.y; @@ -328,27 +398,45 @@ void encode_slice(in SliceContext sc, uint slice_idx) for (int y = 0; y < h; y++) encode_line_pcm(sc, src[p], sp, y, p, comp); } -#else - for (int y = 0; y < sc.slice_dim.y; y++) { - preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, false); - - for (uint c = 0; c < color_planes; c++) - encode_line_pcm(sc, tmp, sp, y, 0, rgb_plane_order[c]); - } #endif return; } #endif +#ifdef BAYER + u32vec4 slice_state_off = (slice_idx*codec_planes + + uvec4(0, 2, 1, 1))*plane_state_size; +#else u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; +#endif #ifdef GOLOMB slice_state_off >>= 3; init_golomb(); #endif -#ifndef RGB +#ifdef BAYER + int run_index = 0; + for (int y = 0; y < bayer_h; y++) { + preload_bayer(sc, sp, bayer_w, y, true); + + for (uint c = 0; c < 4; c++) + encode_line(sc, tmp, slice_state_off[c], + sp, y, 0, c, bits[c], + U8(context_model), run_index); + } +#elif defined(RGB) + int run_index = 0; + for (int y = 0; y < sc.slice_dim.y; y++) { + preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, true); + + for (uint c = 0; c < color_planes; c++) + encode_line(sc, tmp, slice_state_off[c], + sp, y, 0, rgb_plane_order[c], bits[c], + U8(context_model), run_index); + } +#else for (uint c = 0; c < color_planes; c++) { int run_index = 0; @@ -363,16 +451,6 @@ void encode_slice(in SliceContext sc, uint slice_idx) encode_line(sc, src[p], slice_state_off[c], sp, y, p, comp, bits[c], U8(context_model), run_index); } -#else - int run_index = 0; - for (int y = 0; y < sc.slice_dim.y; y++) { - preload_rgb(slice_idx, sc, sp, sc.slice_dim.x, y, true); - - for (uint c = 0; c < color_planes; c++) - encode_line(sc, tmp, slice_state_off[c], - sp, y, 0, rgb_plane_order[c], bits[c], - U8(context_model), run_index); - } #endif } diff --git a/libavcodec/vulkan/ffv1_enc_bayer.comp.glsl b/libavcodec/vulkan/ffv1_enc_bayer.comp.glsl new file mode 100644 index 0000000000..fc463795f2 --- /dev/null +++ b/libavcodec/vulkan/ffv1_enc_bayer.comp.glsl @@ -0,0 +1,31 @@ +/* + * FFv1 codec + * + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_shader_image_load_formatted : require + +layout (set = 1, binding = 4) uniform uimage2D tmp; + +#define RGB +#define BAYER +#include "ffv1_enc.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl b/libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl new file mode 100644 index 0000000000..3c49fac3b7 --- /dev/null +++ b/libavcodec/vulkan/ffv1_enc_bayer_golomb.comp.glsl @@ -0,0 +1,32 @@ +/* + * FFv1 codec + * + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_shader_image_load_formatted : require + +layout (set = 1, binding = 4) uniform uimage2D tmp; + +#define RGB +#define BAYER +#define GOLOMB +#include "ffv1_enc.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl index e931019a43..de2d459536 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc_setup.comp.glsl @@ -252,17 +252,19 @@ void write_slice_header(uint slice_idx, inout SliceContext sc) if (version >= 4) { put_rac(rc_state[0], force_pcm); put_usymbol(uint(force_pcm), 0); - if (!force_pcm && colorspace == 1) { + if (!force_pcm && colorspace != 0) { put_usymbol(sc.slice_rct_coef.g, 0); put_usymbol(sc.slice_rct_coef.r, 0); } - if (remap_mode != 0) { + if (micro_version >= 4) { put_usymbol(remap_mode, 0); - if (c_bits >= 32) - encode_float32_remap(slice_idx, sc); - else - encode_histogram_remap(slice_idx, sc); + if (remap_mode != 0) { + if (c_bits >= 32) + encode_float32_remap(slice_idx, sc); + else + encode_histogram_remap(slice_idx, sc); + } } } } -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
