This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 3dceda776989dfd600b931b8e06c91d66eca1d4f Author: Lynne <[email protected]> AuthorDate: Tue Feb 3 11:41:16 2026 +0100 Commit: Lynne <[email protected]> CommitDate: Thu Feb 19 19:42:27 2026 +0100 vulkan_ffv1: convert to compile-time SPIR-V generation --- configure | 2 +- libavcodec/ffv1_vulkan.h | 16 + libavcodec/vulkan/Makefile | 11 +- .../vulkan/{ffv1_common.comp => ffv1_common.glsl} | 129 ++++- .../vulkan/{ffv1_dec.comp => ffv1_dec.comp.glsl} | 146 ++--- .../vulkan/ffv1_dec_golomb.comp.glsl | 11 +- .../{ffv1_reset.comp => ffv1_dec_reset.comp.glsl} | 12 +- .../ffv1_dec_reset_golomb.comp.glsl} | 11 +- .../vulkan/ffv1_dec_rgb.comp.glsl | 17 +- .../ffv1_dec_rgb_golomb.comp.glsl} | 11 +- ...fv1_dec_setup.comp => ffv1_dec_setup.comp.glsl} | 36 +- libavcodec/vulkan/rangecoder.comp | 14 +- libavcodec/vulkan_ffv1.c | 631 +++++++-------------- 13 files changed, 466 insertions(+), 581 deletions(-) diff --git a/configure b/configure index 7830991de2..8c18012f74 100755 --- a/configure +++ b/configure @@ -3354,7 +3354,7 @@ av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" dpx_vulkan_hwaccel_deps="vulkan spirv_compiler" dpx_vulkan_hwaccel_select="dpx_decoder" -ffv1_vulkan_hwaccel_deps="vulkan spirv_library" +ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler" ffv1_vulkan_hwaccel_select="ffv1_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h index 372478f4b7..a11c61ae4b 100644 --- a/libavcodec/ffv1_vulkan.h +++ b/libavcodec/ffv1_vulkan.h @@ -36,6 +36,22 @@ int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, FFVkBuffer *vkb, FFV1Context *f); +typedef struct FFv1ShaderParams { + VkDeviceAddress slice_data; + VkDeviceAddress slice_state; + + uint32_t extend_lookup[8]; + uint16_t context_count[8]; + + int fmt_lut[4]; + uint16_t img_size[2]; + + uint32_t plane_state_size; + uint32_t key_frame; + uint32_t crcref; + int micro_version; +} FFv1ShaderParams; + typedef struct FFv1VkRCTParameters { int fmt_lut[4]; int offset; diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 860475d960..a9ff44c52d 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -7,10 +7,13 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \ vulkan/ffv1_enc_setup.o vulkan/ffv1_enc.o \ vulkan/ffv1_rct_search.o -OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ - vulkan/rangecoder.o vulkan/ffv1_vlc.o \ - vulkan/ffv1_common.o vulkan/ffv1_reset.o \ - vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \ + vulkan/ffv1_dec_reset.comp.spv.o \ + vulkan/ffv1_dec_reset_golomb.comp.spv.o \ + vulkan/ffv1_dec.comp.spv.o \ + vulkan/ffv1_dec_golomb.comp.spv.o \ + vulkan/ffv1_dec_rgb.comp.spv.o \ + vulkan/ffv1_dec_rgb_golomb.comp.spv.o OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o \ vulkan/prores_raw_idct.comp.spv.o diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.glsl similarity index 72% copy from libavcodec/vulkan/ffv1_common.comp copy to libavcodec/vulkan/ffv1_common.glsl index 5f654e2b29..625e615054 100644 --- a/libavcodec/vulkan/ffv1_common.comp +++ b/libavcodec/vulkan/ffv1_common.glsl @@ -20,13 +20,65 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef VULKAN_FFV1_COMMON_H +#define VULKAN_FFV1_COMMON_H + +#include "rangecoder.comp" +#ifdef GOLOMB +#include "ffv1_vlc.comp" +#endif + +#define MAX_QUANT_TABLES 8 +#define MAX_CONTEXT_INPUTS 5 +#define MAX_QUANT_TABLE_SIZE 256 +#define MAX_QUANT_TABLE_MASK (MAX_QUANT_TABLE_SIZE - 1) + +layout (constant_id = 0) const int rgb_linecache = 2; +layout (constant_id = 1) const bool has_crc = false; +layout (constant_id = 2) const int version = 0; +layout (constant_id = 3) const int quant_table_count = 0; +layout (constant_id = 4) const bool has_extend_lookup = false; + +layout (constant_id = 5) const int rct_offset = 0; +layout (constant_id = 6) const int colorspace = 0; +layout (constant_id = 7) const bool transparency = false; +layout (constant_id = 8) const bool planar_rgb = false; +layout (constant_id = 9) const int codec_planes = 0; +layout (constant_id = 10) const int color_planes = 0; +layout (constant_id = 11) const int planes = 0; +layout (constant_id = 12) const int bits_per_raw_sample = 0; + +layout (constant_id = 13) const int chroma_shift_x = 0; +layout (constant_id = 14) const int chroma_shift_y = 0; +const ivec2 chroma_shift = ivec2(chroma_shift_x, chroma_shift_y); + +layout (push_constant, scalar) uniform pushConstants { + u8buf slice_data; + u8buf slice_state; + + bool extend_lookup[MAX_QUANT_TABLES]; + uint16_t context_count[MAX_QUANT_TABLES]; + + ivec4 fmt_lut; + u16vec2 img_size; + + uint plane_state_size; + bool key_frame; + uint32_t crcref; + int micro_version; +}; + +#define TYPE int32_t +#define VTYPE2 i32vec2 +#define VTYPE3 i32vec3 + struct SliceContext { RangeCoder c; -#if !defined(DECODE) - PutBitContext pb; /* 8*8 bytes */ -#else +#ifdef DECODE GetBitContext gb; +#else + PutBitContext pb; /* 8*8 bytes */ #endif ivec2 slice_dim; @@ -34,12 +86,37 @@ struct SliceContext { ivec2 slice_rct_coef; u8vec3 quant_table_idx; - uint hdr_len; // only used for golomb - uint slice_coding_mode; bool slice_reset_contexts; }; +layout (set = 1, binding = 0) buffer slice_ctx_buf { + SliceContext slice_ctx[]; +}; + +uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) +{ + uint mpw = 1 << chroma_shift; + uint awidth = align(width, mpw); + + if ((version < 4) || ((version == 4) && (micro_version < 3))) + return width * sx / num_h_slices; + + sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; + if (sx == awidth) + sx = width; + + return sx; +} + +#if defined(ENCODE) || defined(DECODE) + +layout (set = 0, binding = 1, scalar) readonly uniform quant_buf { + int16_t quant_table[MAX_QUANT_TABLES] + [MAX_CONTEXT_INPUTS] + [MAX_QUANT_TABLE_SIZE]; +}; + /* -1, { -1, 0 } */ int predict(int L, ivec2 top) { @@ -78,23 +155,8 @@ const uint32_t log2_run[41] = { 24, }; -uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) -{ - uint mpw = 1 << chroma_shift; - uint awidth = align(width, mpw); - - if ((version < 4) || ((version == 4) && (micro_version < 3))) - return width * sx / num_h_slices; - - sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; - if (sx == awidth) - sx = width; - - return sx; -} - #ifdef RGB -#define RGB_LBUF (RGB_LINECACHE - 1) +#define RGB_LBUF (rgb_linecache - 1) #define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF))) ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, @@ -116,7 +178,7 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; - if (expectEXT(extend_lookup, false)) { + if (has_extend_lookup && extend_lookup) { TYPE cur2 = TYPE(0); if (expectEXT(off.x > 0, true)) { const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); @@ -124,12 +186,12 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, } base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; -#if RGB_LINECACHE == 2 - /* top-2 became current upon swap */ - TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]); -#else - TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -2)))[comp]); -#endif + /* top-2 became current upon swap when rgb_linecache == 2 */ + ivec2 top2_off = off; + if (rgb_linecache != 2) + top2_off += ivec2(0, -2); + + TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(top2_off))[comp]); base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; } @@ -137,7 +199,7 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, return ivec2(base, predict(cur, VTYPE2(top))); } -#else /* RGB */ +#else #define LADDR(p) (p) @@ -165,7 +227,7 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; - if (expectEXT(extend_lookup, false)) { + if (has_extend_lookup && extend_lookup) { TYPE cur2 = TYPE(0); if (off.x > 0 && off != ivec2(1, 0)) { const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); @@ -182,4 +244,9 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, /* context, prediction */ return ivec2(base, predict(cur, VTYPE2(top))); } -#endif + +#endif /* RGB */ + +#endif /* ENCODE || DECODE */ + +#endif /* VULKAN_FFV1_COMMON_H */ diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp.glsl similarity index 75% rename from libavcodec/vulkan/ffv1_dec.comp rename to libavcodec/vulkan/ffv1_dec.comp.glsl index 22a961a6df..0d7dec48a3 100644 --- a/libavcodec/vulkan/ffv1_dec.comp +++ b/libavcodec/vulkan/ffv1_dec.comp.glsl @@ -20,14 +20,23 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#define DECODE +#include "common.comp" +#include "ffv1_common.glsl" + +layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf { + u32vec2 slice_offsets[]; +}; +layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf { + uint32_t slice_status[]; +}; +layout (set = 1, binding = 3) uniform uimage2D dec[]; + #ifndef GOLOMB -#ifdef CACHED_SYMBOL_READER -shared uint8_t state[CONTEXT_SIZE]; -#define READ(c, off) get_rac_direct(c, state[off]) -#else #define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off)) -#endif - int get_isymbol(inout RangeCoder c, uint state_off) { if (READ(c, 0)) @@ -56,11 +65,6 @@ int get_isymbol(inout RangeCoder c, uint state_off) void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits) { -#ifdef CACHED_SYMBOL_READER - if (gl_LocalInvocationID.x > 0) - return; -#endif - #ifndef RGB if (p > 0 && p < 3) { w = ceil_rshift(w, chroma_shift.x); @@ -79,7 +83,7 @@ void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int b void decode_line(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits, uint state_off, - uint8_t quant_table_idx, const int run_index) + uint8_t quant_table_idx, int run_index) { #ifndef RGB if (p > 0 && p < 3) { @@ -90,34 +94,28 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, for (int x = 0; x < w; x++) { ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, - quant_table_idx, extend_lookup[quant_table_idx] > 0); + quant_table_idx, extend_lookup[quant_table_idx]); uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]); -#ifdef CACHED_SYMBOL_READER - u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x); - state[gl_LocalInvocationID.x] = sb.v; - barrier(); - if (gl_LocalInvocationID.x == 0) { - -#endif - - int diff = get_isymbol(sc.c, context_off); - if (pr[0] < 0) - diff = -diff; - uint v = zero_extend(pr[1] + diff, bits); - imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); - -#ifdef CACHED_SYMBOL_READER - } + int diff = get_isymbol(sc.c, context_off); + if (pr[0] < 0) + diff = -diff; - barrier(); - sb.v = state[gl_LocalInvocationID.x]; -#endif + uint v = zero_extend(pr[1] + diff, bits); + imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); } } +#else +void golomb_init(inout SliceContext sc) +{ + if (version == 3 && micro_version > 1 || version > 3) + get_rac_internal(sc.c, sc.c.range * 129 >> 8); -#else /* GOLOMB */ + uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1; + init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count), + int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count)); +} void decode_line(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits, uint state_off, @@ -137,7 +135,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, ivec2 pos = sp + ivec2(x, y); int diff; ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, - quant_table_idx, extend_lookup[quant_table_idx] > 0); + quant_table_idx, extend_lookup[quant_table_idx]); uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]); VlcState sb = VlcState(uint64_t(slice_state) + context_off); @@ -209,7 +207,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) pix.r = int(imageLoad(dec[2], lpos)[0]); pix.g = int(imageLoad(dec[0], lpos)[0]); pix.b = int(imageLoad(dec[1], lpos)[0]); - if (transparency != 0) + if (transparency) pix.a = int(imageLoad(dec[3], lpos)[0]); if (expectEXT(apply_rct, true)) @@ -219,7 +217,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) pix[fmt_lut[2]], pix[fmt_lut[3]]); imageStore(dst[0], pos, pix); - if (planar_rgb != 0) { + if (planar_rgb) { for (int i = 1; i < color_planes; i++) imageStore(dst[i], pos, ivec4(pix[i])); } @@ -232,71 +230,73 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) int w = sc.slice_dim.x; ivec2 sp = sc.slice_pos; -#ifndef RGB int bits = bits_per_raw_sample; -#else - int bits = 9; +#ifdef RGB + bits = 9; if (bits != 8 || sc.slice_coding_mode != 0) bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); - sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE; + sp.y = int(gl_WorkGroupID.y)*rgb_linecache; #endif - /* PCM coding */ #ifndef GOLOMB + /* PCM coding */ if (sc.slice_coding_mode == 1) { -#ifndef RGB - for (int p = 0; p < planes; p++) { - int h = sc.slice_dim.y; - if (p > 0 && p < 3) - h = ceil_rshift(h, chroma_shift.y); - - for (int y = 0; y < h; y++) - decode_line_pcm(sc, sp, w, y, p, bits); - } -#else +#ifdef RGB for (int y = 0; y < sc.slice_dim.y; y++) { for (int p = 0; p < color_planes; p++) decode_line_pcm(sc, sp, w, y, p, bits); writeout_rgb(sc, sp, w, y, false); } -#endif - } else - - /* Arithmetic coding */ -#endif - { - u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; - u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; - -#ifndef RGB +#else for (int p = 0; p < planes; p++) { int h = sc.slice_dim.y; if (p > 0 && p < 3) h = ceil_rshift(h, chroma_shift.y); - int run_index = 0; for (int y = 0; y < h; y++) - decode_line(sc, sp, w, y, p, bits, - slice_state_off[p], quant_table_idx[p], run_index); + decode_line_pcm(sc, sp, w, y, p, bits); } -#else - int run_index = 0; - for (int y = 0; y < sc.slice_dim.y; y++) { - for (int p = 0; p < color_planes; p++) - decode_line(sc, sp, w, y, p, bits, - slice_state_off[p], quant_table_idx[p], run_index); +#endif + return; + } +#endif - writeout_rgb(sc, sp, w, y, true); - } + u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; + u32vec4 slice_state_off = (slice_idx*codec_planes + + uvec4(0, 1, 1, 2))*plane_state_size; + +#ifdef GOLOMB + golomb_init(sc); #endif + +#ifdef RGB + int run_index = 0; + for (int y = 0; y < sc.slice_dim.y; y++) { + for (int p = 0; p < color_planes; p++) + decode_line(sc, sp, w, y, p, bits, + slice_state_off[p], quant_table_idx[p], run_index); + + writeout_rgb(sc, sp, w, y, true); } +#else + for (int p = 0; p < planes; p++) { + int h = sc.slice_dim.y; + if (p > 0 && p < 3) + h = ceil_rshift(h, chroma_shift.y); + + int run_index = 0; + for (int y = 0; y < h; y++) + decode_line(sc, sp, w, y, p, bits, + slice_state_off[p], quant_table_idx[p], run_index); + } +#endif } void main(void) { - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; decode_slice(slice_ctx[slice_idx], slice_idx); uint32_t status = corrupt ? uint32_t(corrupt) : overread; diff --git a/libavdevice/ccfifo.c b/libavcodec/vulkan/ffv1_dec_golomb.comp.glsl similarity index 80% copy from libavdevice/ccfifo.c copy to libavcodec/vulkan/ffv1_dec_golomb.comp.glsl index 9007094f0b..4de62a4888 100644 --- a/libavdevice/ccfifo.c +++ b/libavcodec/vulkan/ffv1_dec_golomb.comp.glsl @@ -1,8 +1,7 @@ /* - * CEA-708 Closed Captioning FIFO - * Copyright (c) 2023 LTN Global Communications + * FFv1 codec * - * Author: Devin Heitmueller <[email protected]> + * Copyright (c) 2026 Lynne <[email protected]> * * This file is part of FFmpeg. * @@ -21,4 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavfilter/ccfifo.c" +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#define GOLOMB +#include "ffv1_dec.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_dec_reset.comp.glsl similarity index 88% copy from libavcodec/vulkan/ffv1_reset.comp copy to libavcodec/vulkan/ffv1_dec_reset.comp.glsl index cfb7dcc444..e157923d0a 100644 --- a/libavcodec/vulkan/ffv1_reset.comp +++ b/libavcodec/vulkan/ffv1_dec_reset.comp.glsl @@ -20,12 +20,17 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#include "common.comp" +#include "ffv1_common.glsl" + void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - if (key_frame == 0 && - slice_ctx[slice_idx].slice_reset_contexts == false) + if (!key_frame && !slice_ctx[slice_idx].slice_reset_contexts) return; const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z]; @@ -35,7 +40,8 @@ void main(void) #ifdef GOLOMB uint64_t start = slice_state_off + - (gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) + gl_LocalInvocationID.x)*VLC_STATE_SIZE; + (gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) + + gl_LocalInvocationID.x)*VLC_STATE_SIZE; for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) { VlcState sb = VlcState(start); sb.drift = int16_t(0); diff --git a/libavcodec/aacps_fixed_tablegen.c b/libavcodec/vulkan/ffv1_dec_reset_golomb.comp.glsl similarity index 80% copy from libavcodec/aacps_fixed_tablegen.c copy to libavcodec/vulkan/ffv1_dec_reset_golomb.comp.glsl index 9e306991f0..8d7ff27230 100644 --- a/libavcodec/aacps_fixed_tablegen.c +++ b/libavcodec/vulkan/ffv1_dec_reset_golomb.comp.glsl @@ -1,7 +1,7 @@ /* - * Generate a header file for hardcoded Parametric Stereo tables + * FFv1 codec * - * Copyright (c) 2010 Alex Converse <[email protected]> + * Copyright (c) 2026 Lynne <[email protected]> * * This file is part of FFmpeg. * @@ -20,5 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#define USE_FIXED 1 -#include "aacps_tablegen_template.c" +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#define GOLOMB +#include "ffv1_dec_reset.comp.glsl" diff --git a/compat/android/binder.h b/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl similarity index 72% copy from compat/android/binder.h copy to libavcodec/vulkan/ffv1_dec_rgb.comp.glsl index 2b1ca53fe8..fe0d6957df 100644 --- a/compat/android/binder.h +++ b/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl @@ -1,7 +1,7 @@ /* - * Android Binder handler + * FFv1 codec * - * Copyright (c) 2025 Dmitrii Okunev + * Copyright (c) 2026 Lynne <[email protected]> * * This file is part of FFmpeg. * @@ -20,12 +20,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef COMPAT_ANDROID_BINDER_H -#define COMPAT_ANDROID_BINDER_H +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_shader_image_load_formatted : require -/** - * Initialize Android Binder thread pool. - */ -void android_binder_threadpool_init_if_required(void); +layout (set = 1, binding = 4) writeonly uniform uimage2D dst[]; -#endif // COMPAT_ANDROID_BINDER_H +#define RGB +#include "ffv1_dec.comp.glsl" diff --git a/libavcodec/aacps_fixed_tablegen.c b/libavcodec/vulkan/ffv1_dec_rgb_golomb.comp.glsl similarity index 80% copy from libavcodec/aacps_fixed_tablegen.c copy to libavcodec/vulkan/ffv1_dec_rgb_golomb.comp.glsl index 9e306991f0..8b9aadbd59 100644 --- a/libavcodec/aacps_fixed_tablegen.c +++ b/libavcodec/vulkan/ffv1_dec_rgb_golomb.comp.glsl @@ -1,7 +1,7 @@ /* - * Generate a header file for hardcoded Parametric Stereo tables + * FFv1 codec * - * Copyright (c) 2010 Alex Converse <[email protected]> + * Copyright (c) 2026 Lynne <[email protected]> * * This file is part of FFmpeg. * @@ -20,5 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#define USE_FIXED 1 -#include "aacps_tablegen_template.c" +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#define GOLOMB +#include "ffv1_dec_rgb.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl similarity index 85% rename from libavcodec/vulkan/ffv1_dec_setup.comp rename to libavcodec/vulkan/ffv1_dec_setup.comp.glsl index fd297cb70a..4607a8105a 100644 --- a/libavcodec/vulkan/ffv1_dec_setup.comp +++ b/libavcodec/vulkan/ffv1_dec_setup.comp.glsl @@ -20,6 +20,23 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#pragma shader_stage(compute) +#extension GL_GOOGLE_include_directive : require + +#include "common.comp" +#include "ffv1_common.glsl" + +layout (set = 0, binding = 1, scalar) uniform crc_ieee_buf { + uint32_t crc_ieee[256]; +}; + +layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf { + u32vec2 slice_offsets[]; +}; +layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf { + uint32_t slice_status[]; +}; + uint8_t setup_state[CONTEXT_SIZE]; uint get_usymbol(inout RangeCoder c) @@ -98,21 +115,9 @@ bool decode_slice_header(inout SliceContext sc) return false; } -void golomb_init(inout SliceContext sc) -{ - if (version == 3 && micro_version > 1 || version > 3) { - setup_state[0] = uint8_t(129); - get_rac_direct(sc.c, setup_state[0]); - } - - uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1; - init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count), - int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count)); -} - void main(void) { - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; u8buf bs = u8buf(slice_data + slice_offsets[slice_idx].x); uint32_t slice_size = slice_offsets[slice_idx].y; @@ -125,10 +130,7 @@ void main(void) decode_slice_header(slice_ctx[slice_idx]); - if (golomb == 1) - golomb_init(slice_ctx[slice_idx]); - - if (ec != 0 && check_crc != 0) { + if (has_crc) { uint32_t crc = crcref; for (int i = 0; i < slice_size; i++) crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 98ff743b2e..7dffaf6c70 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -20,6 +20,14 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef CONTEXT_SIZE +#define CONTEXT_SIZE 32 + +layout (set = 0, binding = 0, scalar) uniform rangecoder_buf { + uint8_t zero_one_state[512]; +}; +#endif + struct RangeCoder { uint64_t bytestream_start; uint64_t bytestream; @@ -42,8 +50,6 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size) r.outstanding_byte = uint8_t(0xFF); } -#if !defined(DECODE) - #ifdef FULL_RENORM /* Full renorm version that can handle outstanding_byte == 0xFF */ void renorm_encoder(inout RangeCoder c) @@ -178,8 +184,6 @@ uint32_t rac_terminate(inout RangeCoder c) return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start)); } -#else - /* Decoder */ uint overread = 0; bool corrupt = false; @@ -243,5 +247,3 @@ bool get_rac_equi(inout RangeCoder c) { return get_rac_internal(c, c.range >> 1); } - -#endif diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index 7766d67511..044d95ceed 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -23,18 +23,30 @@ #include "ffv1.h" #include "ffv1_vulkan.h" -#include "libavutil/vulkan_spirv.h" #include "libavutil/mem.h" #define RGB_LINECACHE 2 -extern const char *ff_source_common_comp; -extern const char *ff_source_rangecoder_comp; -extern const char *ff_source_ffv1_vlc_comp; -extern const char *ff_source_ffv1_common_comp; -extern const char *ff_source_ffv1_dec_setup_comp; -extern const char *ff_source_ffv1_reset_comp; -extern const char *ff_source_ffv1_dec_comp; +extern const unsigned char ff_ffv1_dec_setup_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_setup_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_reset_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_reset_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_reset_golomb_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_reset_golomb_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_rgb_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_rgb_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_golomb_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_golomb_comp_spv_len; + +extern const unsigned char ff_ffv1_dec_rgb_golomb_comp_spv_data[]; +extern const unsigned int ff_ffv1_dec_rgb_golomb_comp_spv_len; const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { .codec_id = AV_CODEC_ID_FFV1, @@ -64,80 +76,15 @@ typedef struct FFv1VulkanDecodeContext { FFVulkanShader reset; FFVulkanShader decode; - FFVkBuffer rangecoder_static_buf; + FFVkBuffer rangecoder_buf; FFVkBuffer quant_buf; - FFVkBuffer crc_tab_buf; + FFVkBuffer crc_buf; AVBufferPool *slice_state_pool; AVBufferPool *slice_offset_pool; AVBufferPool *slice_status_pool; } FFv1VulkanDecodeContext; -typedef struct FFv1VkParameters { - VkDeviceAddress slice_data; - VkDeviceAddress slice_state; - - int fmt_lut[4]; - uint32_t img_size[2]; - uint32_t chroma_shift[2]; - - uint32_t plane_state_size; - uint32_t crcref; - int rct_offset; - - uint8_t extend_lookup[8]; - uint8_t bits_per_raw_sample; - uint8_t quant_table_count; - uint8_t version; - uint8_t micro_version; - uint8_t key_frame; - uint8_t planes; - uint8_t codec_planes; - uint8_t color_planes; - uint8_t transparency; - uint8_t planar_rgb; - uint8_t colorspace; - uint8_t ec; - uint8_t golomb; - uint8_t check_crc; - uint8_t padding[3]; -} FFv1VkParameters; - -static void add_push_data(FFVulkanShader *shd) -{ - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_data; ); - GLSLC(1, u8buf slice_state; ); - GLSLC(0, ); - GLSLC(1, ivec4 fmt_lut; ); - GLSLC(1, uvec2 img_size; ); - GLSLC(1, uvec2 chroma_shift; ); - GLSLC(0, ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint32_t crcref; ); - GLSLC(1, int rct_offset; ); - GLSLC(0, ); - GLSLC(1, uint8_t extend_lookup[8]; ); - GLSLC(1, uint8_t bits_per_raw_sample; ); - GLSLC(1, uint8_t quant_table_count; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t planes; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t color_planes; ); - GLSLC(1, uint8_t transparency; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t colorspace; ); - GLSLC(1, uint8_t ec; ); - GLSLC(1, uint8_t golomb; ); - GLSLC(1, uint8_t check_crc; ); - GLSLC(1, uint8_t padding[3]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), - VK_SHADER_STAGE_COMPUTE_BIT); -} - static int vk_ffv1_start_frame(AVCodecContext *avctx, const AVBufferRef *buffer_ref, av_unused const uint8_t *buffer, @@ -291,13 +238,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) FFV1Context *f = avctx->priv_data; FFv1VulkanDecodeContext *fv = ctx->sd_ctx; - FFv1VkParameters pd; - FFv1VkResetParameters pd_reset; AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; enum AVPixelFormat sw_format = hwfc->sw_format; - int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8; int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && !(sw_format == AV_PIX_FMT_YA8); int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components; @@ -408,39 +352,25 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) VK_FORMAT_UNDEFINED); ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); - pd = (FFv1VkParameters) { + + FFv1ShaderParams pd = { .slice_data = slices_buf->address, .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, .img_size[0] = f->picture.f->width, .img_size[1] = f->picture.f->height, - .chroma_shift[0] = f->chroma_h_shift, - .chroma_shift[1] = f->chroma_v_shift, .plane_state_size = fp->plane_state_size, + .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, .crcref = f->crcref, - .rct_offset = 1 << bits, - - .bits_per_raw_sample = bits, - .quant_table_count = f->quant_table_count, - .version = f->version, .micro_version = f->micro_version, - .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, - .planes = av_pix_fmt_count_planes(sw_format), - .codec_planes = f->plane_count, - .color_planes = color_planes, - .transparency = f->transparency, - .planar_rgb = ff_vk_mt_is_np_rgb(sw_format) && - (ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1), - .colorspace = f->colorspace, - .ec = f->ec, - .golomb = f->ac == AC_GOLOMB_RICE, - .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK), }; - for (int i = 0; i < f->quant_table_count; i++) - pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) || - (f->quant_tables[i][4][127] != 0); + for (int i = 0; i < f->quant_table_count; i++) { + pd.context_count[i] = f->context_count[i]; + pd.extend_lookup[i] = f->quant_tables[i][3][127] || + f->quant_tables[i][4][127]; + } /* For some reason the C FFv1 encoder/decoder treats these differently */ if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 || @@ -451,7 +381,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); + 0, sizeof(FFv1ShaderParams), &pd); vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); @@ -476,21 +406,9 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) VK_FORMAT_UNDEFINED); ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader); - - pd_reset = (FFv1VkResetParameters) { - .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, - .plane_state_size = fp->plane_state_size, - .codec_planes = f->plane_count, - .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, - .version = f->version, - .micro_version = f->micro_version, - }; - for (int i = 0; i < f->quant_table_count; i++) - pd_reset.context_count[i] = f->context_count[i]; - ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd_reset), &pd_reset); + 0, sizeof(FFv1ShaderParams), &pd); /* Sync between setup and reset shaders */ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state, @@ -530,27 +448,33 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) slice_state, 0, fp->slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); - ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, - decode_dst, decode_dst_view, - 1, 1, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 1, 0, + slice_offset, + 0, 2*f->slice_count*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, 1, 2, 0, slice_status, 0, 2*f->slice_count*sizeof(uint32_t), VK_FORMAT_UNDEFINED); + + ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, + decode_dst, decode_dst_view, + 1, 3, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); if (is_rgb) ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, f->picture.f, vp->view.out, - 1, 3, + 1, 4, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); + 0, sizeof(FFv1ShaderParams), &pd); /* Sync probabilities between reset and decode shaders */ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state, @@ -602,329 +526,175 @@ fail: return 0; } -static void define_shared_code(FFVulkanShader *shd, int use32bit) +static int init_setup_shader(FFV1Context *f, FFVulkanContext *s, + FFVkExecPool *pool, FFVulkanShader *shd, + VkSpecializationInfo *sl) { - int smp_bits = use32bit ? 32 : 16; - - GLSLC(0, #define DECODE ); + int err; - av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE); - av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, + (uint32_t []) { 1, 1, 1 }, 0); - GLSLF(0, #define TYPE int%i_t ,smp_bits); - GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); - GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); - GLSLD(ff_source_rangecoder_comp); - GLSLD(ff_source_ffv1_common_comp); -} + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams), + VK_SHADER_STAGE_COMPUTE_BIT); -static int init_setup_shader(FFV1Context *f, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd) -{ - int err; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 1, 1, 1, - 0)); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - add_push_data(shd); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", + const FFVulkanDescriptorSetBinding desc_set_const[] = { + { /* rangecoder_buf */ + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "crc_ieee_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint32_t crc_ieee[256];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + { /* crc_ieee_buf */ + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; + ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0); - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); - - define_shared_code(shd, 0 /* Irrelevant */); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, + const FFVulkanDescriptorSetBinding desc_set[] = { + { /* slice_data_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "slice_offsets_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "readonly", - .buf_content = "u32vec2 slice_offsets", - .buf_elems = 2*f->max_slice_count, + { /* slice_offsets_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "slice_status_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "writeonly", - .buf_content = "uint32_t slice_status", - .buf_elems = 2*f->max_slice_count, + { /* slice_status_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0)); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0); - GLSLD(ff_source_ffv1_dec_setup_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, + ff_ffv1_dec_setup_comp_spv_data, + ff_ffv1_dec_setup_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } static int init_reset_shader(FFV1Context *f, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int ac) + FFVkExecPool *pool, FFVulkanShader *shd, + VkSpecializationInfo *sl, int ac) { int err; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024); - RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - wg_dim, 1, 1, - 0)); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, + (uint32_t []) { wg_dim, 1, 1 }, 0); - if (ac == AC_GOLOMB_RICE) - av_bprintf(&shd->src, "#define GOLOMB\n"); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t padding[1]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams), VK_SHADER_STAGE_COMPUTE_BIT); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + const FFVulkanDescriptorSetBinding desc_set_const[] = { + { /* rangecoder_buf */ + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); + ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 1, 1, 0); - define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */); - if (ac == AC_GOLOMB_RICE) - GLSLD(ff_source_ffv1_vlc_comp); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, + const FFVulkanDescriptorSetBinding desc_set[] = { + { /* slice_data_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0); - GLSLD(ff_source_ffv1_reset_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + if (ac == AC_GOLOMB_RICE) + RET(ff_vk_shader_link(s, shd, + ff_ffv1_dec_reset_golomb_comp_spv_data, + ff_ffv1_dec_reset_golomb_comp_spv_len, "main")); + else + RET(ff_vk_shader_link(s, shd, + ff_ffv1_dec_reset_comp_spv_data, + ff_ffv1_dec_reset_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, + FFVkExecPool *pool, FFVulkanShader *shd, AVHWFramesContext *dec_frames_ctx, AVHWFramesContext *out_frames_ctx, - int ac, int rgb) + VkSpecializationInfo *sl, int ac, int rgb) { int err; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - int use_cached_reader = ac != AC_GOLOMB_RICE && - s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV; - - RET(ff_vk_shader_init(s, shd, "ffv1_dec", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - use_cached_reader ? CONTEXT_SIZE : 1, 1, 1, - 0)); - - if (ac == AC_GOLOMB_RICE) - av_bprintf(&shd->src, "#define GOLOMB\n"); - - if (rgb) - av_bprintf(&shd->src, "#define RGB\n"); - - if (use_cached_reader) - av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n"); - /* Common codec header */ - GLSLD(ff_source_common_comp); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, + (uint32_t []) { 1, 1, 1 }, 0); - add_push_data(shd); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams), + VK_SHADER_STAGE_COMPUTE_BIT); - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", + const FFVulkanDescriptorSetBinding desc_set_const[] = { + { /* rangecoder_buf */ + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + { /* quant_buf */ + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; + ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0); - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); - - define_shared_code(shd, f->use32bit); - if (ac == AC_GOLOMB_RICE) - GLSLD(ff_source_ffv1_vlc_comp); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, + const FFVulkanDescriptorSetBinding desc_set[] = { + { /* slice_data_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { /* slice_offsets_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "dec", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + { /* slice_status_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, - { - .name = "slice_status_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "writeonly", - .buf_content = "uint32_t slice_status", - .buf_elems = 2*f->max_slice_count, + { /* dec */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), }, - { - .name = "dst", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .mem_quali = "writeonly", - .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + { /* dst */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3 + rgb, 0, 0)); - - GLSLD(ff_source_ffv1_dec_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 4 + rgb, 0, 0); + + if (ac == AC_GOLOMB_RICE) { + if (rgb) + ff_vk_shader_link(s, shd, + ff_ffv1_dec_rgb_golomb_comp_spv_data, + ff_ffv1_dec_rgb_golomb_comp_spv_len, "main"); + else + ff_vk_shader_link(s, shd, + ff_ffv1_dec_golomb_comp_spv_data, + ff_ffv1_dec_golomb_comp_spv_len, "main"); + } else { + if (rgb) + ff_vk_shader_link(s, shd, + ff_ffv1_dec_rgb_comp_spv_data, + ff_ffv1_dec_rgb_comp_spv_len, "main"); + else + ff_vk_shader_link(s, shd, + ff_ffv1_dec_comp_spv_data, + ff_ffv1_dec_comp_spv_len, "main"); + } RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } @@ -954,7 +724,8 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s, err = av_hwframe_ctx_init(*dst); if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", + av_log(avctx, AV_LOG_ERROR, + "Unable to initialize frame pool with format %s: %s\n", av_get_pix_fmt_name(sw_format), av_err2str(err)); av_buffer_unref(dst); return err; @@ -973,9 +744,9 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx) ff_vk_shader_free(&ctx->s, &fv->reset); ff_vk_shader_free(&ctx->s, &fv->decode); + ff_vk_free_buf(&ctx->s, &fv->rangecoder_buf); ff_vk_free_buf(&ctx->s, &fv->quant_buf); - ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf); - ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf); + ff_vk_free_buf(&ctx->s, &fv->crc_buf); av_buffer_pool_uninit(&fv->slice_state_pool); av_buffer_pool_uninit(&fv->slice_offset_pool); @@ -991,18 +762,11 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; FFVulkanDecodeShared *ctx = NULL; FFv1VulkanDecodeContext *fv; - FFVkSPIRVCompiler *spv; if (f->version < 3 || (f->version == 4 && f->micro_version > 3)) return AVERROR(ENOTSUP); - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - err = ff_vk_decode_init(avctx); if (err < 0) return err; @@ -1019,6 +783,8 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; AVHWFramesContext *dctx = hwfc; enum AVPixelFormat sw_format = hwfc->sw_format; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sw_format); + int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components; int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && !(sw_format == AV_PIX_FMT_YA8); @@ -1029,63 +795,78 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx) dctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; } + SPEC_LIST_CREATE(sl, 15, 15*sizeof(uint32_t)) + + if (RGB_LINECACHE != 2) + SPEC_LIST_ADD(sl, 0, 32, RGB_LINECACHE); + + if (f->ec && !!(avctx->err_recognition & AV_EF_CRCCHECK)) + SPEC_LIST_ADD(sl, 1, 32, 1); + + SPEC_LIST_ADD(sl, 2, 32, f->version); + SPEC_LIST_ADD(sl, 3, 32, f->quant_table_count); + + for (int i = 0; i < f->quant_table_count; i++) { + if (f->quant_tables[i][3][127] || f->quant_tables[i][4][127]) { + SPEC_LIST_ADD(sl, 4, 32, 1); + break; + } + } + + int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8; + SPEC_LIST_ADD(sl, 5, 32, 1 << bits); + SPEC_LIST_ADD(sl, 6, 32, f->colorspace); + SPEC_LIST_ADD(sl, 7, 32, f->transparency); + SPEC_LIST_ADD(sl, 8, 32, ff_vk_mt_is_np_rgb(sw_format) && + (desc->flags & AV_PIX_FMT_FLAG_PLANAR)); + SPEC_LIST_ADD(sl, 9, 32, f->plane_count); + SPEC_LIST_ADD(sl, 10, 32, color_planes); + SPEC_LIST_ADD(sl, 11, 32, av_pix_fmt_count_planes(sw_format)); + SPEC_LIST_ADD(sl, 12, 32, bits); + + SPEC_LIST_ADD(sl, 13, 32, f->chroma_h_shift); + SPEC_LIST_ADD(sl, 14, 32, f->chroma_v_shift); + /* Setup shader */ - RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup)); + RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, &fv->setup, sl)); /* Reset shader */ - RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool, - spv, &fv->reset, f->ac)); + RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool, &fv->reset, sl, f->ac)); /* Decode shaders */ - RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, - spv, &fv->decode, - dctx, - hwfc, - f->ac, - is_rgb)); - - /* Range coder data */ - RET(ff_ffv1_vk_init_state_transition_data(&ctx->s, - &fv->rangecoder_static_buf, - f)); - - /* Quantization table data */ - RET(ff_ffv1_vk_init_quant_table_data(&ctx->s, - &fv->quant_buf, - f)); - - /* CRC table buffer */ - RET(ff_ffv1_vk_init_crc_table_data(&ctx->s, - &fv->crc_tab_buf, - f)); + RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode, + dctx, hwfc, sl, f->ac, is_rgb)); + + /* Init static data */ + RET(ff_ffv1_vk_init_state_transition_data(&ctx->s, &fv->rangecoder_buf, f)); + RET(ff_ffv1_vk_init_crc_table_data(&ctx->s, &fv->crc_buf, f)); + RET(ff_ffv1_vk_init_quant_table_data(&ctx->s, &fv->quant_buf, f)); /* Update setup global descriptors */ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->setup, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, + &fv->rangecoder_buf, + 0, 512*sizeof(uint8_t), VK_FORMAT_UNDEFINED)); RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->setup, 0, 1, 0, - &fv->crc_tab_buf, - 0, fv->crc_tab_buf.size, + &fv->crc_buf, + 0, 256*sizeof(uint32_t), VK_FORMAT_UNDEFINED)); /* Update decode global descriptors */ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->decode, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, + &fv->rangecoder_buf, + 0, 512*sizeof(uint8_t), VK_FORMAT_UNDEFINED)); RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->decode, 0, 1, 0, &fv->quant_buf, - 0, fv->quant_buf.size, + 0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED)); fail: - spv->uninit(&spv); - return err; } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
