This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 713f191c241a0b5c3b7a96a3d0efe3ad7faa9e3a Author: Lynne <[email protected]> AuthorDate: Wed May 27 04:02:37 2026 +0900 Commit: Lynne <[email protected]> CommitDate: Wed Jun 3 14:12:50 2026 +0900 vulkan_ffv1: add Bayer decoder Sponsored-by: Sovereign Tech Fund --- libavcodec/ffv1_vulkan.c | 9 +++ libavcodec/vulkan/Makefile | 4 +- libavcodec/vulkan/ffv1_common.glsl | 2 + libavcodec/vulkan/ffv1_dec.comp.glsl | 76 +++++++++++++++++++++- ..._dec_rgb.comp.glsl => ffv1_dec_bayer.comp.glsl} | 1 + ...b.comp.glsl => ffv1_dec_bayer_golomb.comp.glsl} | 2 + libavcodec/vulkan/ffv1_dec_setup.comp.glsl | 2 +- libavcodec/vulkan/ffv1_vlc.glsl | 2 +- libavcodec/vulkan_ffv1.c | 33 ++++++++-- 9 files changed, 118 insertions(+), 13 deletions(-) diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c index 73c2b2a7ce..81843b1701 100644 --- a/libavcodec/ffv1_vulkan.c +++ b/libavcodec/ffv1_vulkan.c @@ -41,6 +41,15 @@ void ff_ffv1_vk_set_common_sl(AVCodecContext *avctx, FFV1Context *f, } int bits = desc->comp[0].depth; + /* Bayer pixfmts report misleading per-component depth in comp[0].depth + * (it counts the fraction of bits each component contributes per output + * pixel, not the per-sample bit width). Use bits_per_raw_sample. The + * encoder fills f->bits_per_raw_sample directly; the decoder only + * fills f->avctx->bits_per_raw_sample. Prefer the FFV1Context field + * with the avctx field as a fallback so this works from both sides. */ + if (f->bayer) + bits = f->bits_per_raw_sample ? f->bits_per_raw_sample + : f->avctx->bits_per_raw_sample; SPEC_LIST_ADD(sl, 5, 32, (uint32_t)(1ULL << bits)); SPEC_LIST_ADD(sl, 6, 32, f->colorspace); SPEC_LIST_ADD(sl, 7, 32, f->transparency); diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index f86931727d..0425548978 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -25,7 +25,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \ vulkan/ffv1_dec_rgb.comp.spv.o \ vulkan/ffv1_dec_rgb_golomb.comp.spv.o \ vulkan/ffv1_dec_rgb_float.comp.spv.o \ - vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o + vulkan/ffv1_dec_rgb_float_golomb.comp.spv.o \ + vulkan/ffv1_dec_bayer.comp.spv.o \ + vulkan/ffv1_dec_bayer_golomb.comp.spv.o OBJS-$(CONFIG_PRORES_KS_VULKAN_ENCODER) += vulkan/prores_ks_alpha_data.comp.spv.o \ vulkan/prores_ks_slice_data.comp.spv.o \ diff --git a/libavcodec/vulkan/ffv1_common.glsl b/libavcodec/vulkan/ffv1_common.glsl index 3d3b6753c6..36bce88a4a 100644 --- a/libavcodec/vulkan/ffv1_common.glsl +++ b/libavcodec/vulkan/ffv1_common.glsl @@ -122,6 +122,8 @@ layout (set = 1, binding = 0, scalar) SB_QUALI buffer slice_ctx_buf { uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) { uint mpw = 1 << chroma_shift; + if (colorspace == 2) + mpw = max(mpw, 2u); uint awidth = align(width, mpw); if ((version < 4) || ((version == 4) && (micro_version < 3))) diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl b/libavcodec/vulkan/ffv1_dec.comp.glsl index 82835e8f92..9ad2ec2442 100644 --- a/libavcodec/vulkan/ffv1_dec.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec.comp.glsl @@ -247,6 +247,43 @@ void decode_line(ivec2 sp, int w, } #endif +#ifdef BAYER +void writeout_bayer(uint slice_idx, in SliceContext sc, ivec2 sp, int w, int y) +{ + memoryBarrierImage(); + barrier(); + + int offset = rct_offset; + + for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { + ivec2 lpos = sp + LADDR(ivec2(x, y)); + ivec2 pos = sc.slice_pos + ivec2(int(x) << 1, y << 1); + + int g_r = int(imageLoad(dec[0], lpos)[0]); + int g_b = int(imageLoad(dec[1], lpos)[0]); + int b = int(imageLoad(dec[2], lpos)[0]); + int r = int(imageLoad(dec[3], lpos)[0]); + + if (sc.slice_coding_mode != 1) { + b -= offset; + r -= offset; + g_r -= (b*sc.slice_rct_coef.g + r*sc.slice_rct_coef.r) >> 2; + b += g_r; + r += g_r; + + int gd = g_b - offset; + g_b = g_r - (gd >> 1); + g_r = g_b + gd; + } + + imageStore(dst[0], pos + ivec2(0, 0), uvec4(r)); + imageStore(dst[0], pos + ivec2(1, 0), uvec4(g_r)); + imageStore(dst[0], pos + ivec2(0, 1), uvec4(g_b)); + imageStore(dst[0], pos + ivec2(1, 1), uvec4(b)); + } +} +#endif + #ifdef RGB ivec4 transform_sample(ivec4 pix, ivec2 rct_coef, int offset) { @@ -319,14 +356,32 @@ void decode_slice(in SliceContext sc, uint slice_idx) ivec2 sp = sc.slice_pos; u16vec4 bits = get_slice_bits(sc); -#ifdef RGB +#ifdef BAYER + /* Bayer logical dims: 2x2 blocks at half resolution */ + w >>= 1; + int bayer_h = sc.slice_dim.y >> 1; + sp.x >>= 1; + sp.y = int(gl_WorkGroupID.y)*rgb_linecache; + /* c_bits = bps + 1 (the +1 is for is_rgb). For PCM mode, all planes use + * raw bps. For non-PCM, gm uses bps; gd/b-gm/r-gm use bps+1. */ + if (sc.slice_coding_mode == 0) + bits = u16vec4(c_bits - 1, c_bits, c_bits, c_bits); + else + bits = u16vec4(c_bits - 1, c_bits - 1, c_bits - 1, c_bits - 1); +#elif defined(RGB) sp.y = int(gl_WorkGroupID.y)*rgb_linecache; #endif #ifndef GOLOMB /* PCM coding */ if (sc.slice_coding_mode == 1) { -#ifdef RGB +#ifdef BAYER + for (int y = 0; y < bayer_h; y++) { + for (int p = 0; p < 4; p++) + decode_line_pcm(sp, w, y, p); + writeout_bayer(slice_idx, sc, sp, w, y); + } +#elif defined(RGB) for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) decode_line_pcm(sp, w, y, p); @@ -347,16 +402,31 @@ void decode_slice(in SliceContext sc, uint slice_idx) } #endif +#ifdef BAYER + u8vec4 quant_table_idx = sc.quant_table_idx.xzyy; + u32vec4 slice_state_off = (slice_idx*codec_planes + + uvec4(0, 2, 1, 1))*plane_state_size; +#else u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; +#endif #ifdef GOLOMB slice_state_off >>= 3; // division by VLC_STATE_SIZE golomb_init(); #endif -#ifdef RGB +#ifdef BAYER + int run_index = 0; + for (int y = 0; y < bayer_h; y++) { + for (int p = 0; p < 4; p++) + decode_line(sp, w, y, p, bits[p], + slice_state_off[p], quant_table_idx[p], run_index); + + writeout_bayer(slice_idx, sc, sp, w, y); + } +#elif defined(RGB) int run_index = 0; for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) diff --git a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl similarity index 98% copy from libavcodec/vulkan/ffv1_dec_rgb.comp.glsl copy to libavcodec/vulkan/ffv1_dec_bayer.comp.glsl index 72dc31ba15..6ceb15a35a 100644 --- a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec_bayer.comp.glsl @@ -27,4 +27,5 @@ layout (set = 1, binding = 5) writeonly uniform uimage2D dst[]; #define RGB +#define BAYER #include "ffv1_dec.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl similarity index 97% copy from libavcodec/vulkan/ffv1_dec_rgb.comp.glsl copy to libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl index 72dc31ba15..ca8b7bada0 100644 --- a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec_bayer_golomb.comp.glsl @@ -27,4 +27,6 @@ layout (set = 1, binding = 5) writeonly uniform uimage2D dst[]; #define RGB +#define BAYER +#define GOLOMB #include "ffv1_dec.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl index ff57c57dc3..d000116012 100644 --- a/libavcodec/vulkan/ffv1_dec_setup.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl @@ -191,7 +191,7 @@ bool decode_slice_header(uint slice_idx, inout SliceContext sc) if (version >= 4) { sc.slice_reset_contexts = get_rac(rc_state[0]); sc.slice_coding_mode = get_usymbol(0); - if (sc.slice_coding_mode != 1 && colorspace == 1) { + if (sc.slice_coding_mode != 1 && colorspace != 0) { sc.slice_rct_coef.g = int(get_usymbol(0)); sc.slice_rct_coef.r = int(get_usymbol(0)); if (sc.slice_rct_coef.g + sc.slice_rct_coef.r > 4) diff --git a/libavcodec/vulkan/ffv1_vlc.glsl b/libavcodec/vulkan/ffv1_vlc.glsl index f362d3afbb..51f3d7ddae 100644 --- a/libavcodec/vulkan/ffv1_vlc.glsl +++ b/libavcodec/vulkan/ffv1_vlc.glsl @@ -35,7 +35,7 @@ void update_vlc_state(inout VlcState state, in int v) int drift = state.drift; int count = state.count; int bias = state.bias; - state.error_sum += uint16_t(abs(v)); + state.error_sum += uint32_t(abs(v)); drift += v; if (count == 128) { // FIXME: variable diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index 4056f3958a..e8dafb2505 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -54,6 +54,12 @@ extern const unsigned int ff_ffv1_dec_rgb_float_comp_spv_len; extern const unsigned char ff_ffv1_dec_rgb_float_golomb_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_rgb_float_golomb_comp_spv_len; +extern const unsigned char ff_ffv1_dec_bayer_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_bayer_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_bayer_golomb_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_bayer_golomb_comp_spv_len; + const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { .codec_id = AV_CODEC_ID_FFV1, .queue_flags = VK_QUEUE_COMPUTE_BIT, @@ -393,7 +399,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) nb_img_bar = 0; nb_buf_bar = 0; - for (int i = 0; i < color_planes; i++) + /* The intermediate frame has 4 planes (GBRAP16/32). Clear all of + * them since the bayer decoder uses all four. */ + int n_dec_planes = f->bayer ? 4 : color_planes; + for (int i = 0; i < n_dec_planes; i++) vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL, &((VkClearColorValue) { 0 }), 1, &((VkImageSubresourceRange) { @@ -519,7 +528,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) 1, 5, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - if (fltmap_buf) + if (fltmap_buf && !f->bayer) ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 6, 0, fltmap_buf, @@ -651,7 +660,8 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd, AVHWFramesContext *dec_frames_ctx, AVHWFramesContext *out_frames_ctx, - VkSpecializationInfo *sl, int ac, int rgb) + VkSpecializationInfo *sl, int ac, int rgb, + int bayer) { int err; @@ -707,10 +717,19 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set, - 5 + rgb + (f->micro_version >= 9), + 5 + rgb + (!bayer && f->micro_version >= 9), 0, 0); - if (f->version >=4 && f->micro_version >= 9) { + if (bayer) { + if (ac == AC_GOLOMB_RICE) + ff_vk_shader_link(s, shd, + ff_ffv1_dec_bayer_golomb_comp_spv_data, + ff_ffv1_dec_bayer_golomb_comp_spv_len, "main"); + else + ff_vk_shader_link(s, shd, + ff_ffv1_dec_bayer_comp_spv_data, + ff_ffv1_dec_bayer_comp_spv_len, "main"); + } else if (f->version >=4 && f->micro_version >= 9) { if (ac == AC_GOLOMB_RICE) ff_vk_shader_link(s, shd, ff_ffv1_dec_rgb_float_golomb_comp_spv_data, @@ -809,7 +828,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) FFv1VulkanDecodeContext *fv; if (f->version < 3 || - (f->version == 4 && f->micro_version >= 10)) + (f->version == 4 && f->micro_version >= 10 && !f->bayer)) return AVERROR(ENOTSUP); /* Streams with a low amount of slices will usually be much slower @@ -861,7 +880,7 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) /* Decode shaders */ RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode, - dctx, hwfc, sl, f->ac, is_rgb)); + dctx, hwfc, sl, f->ac, is_rgb, f->bayer)); /* Init static data */ RET(ff_ffv1_vk_init_consts(&ctx->s, &fv->consts_buf, f)); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
