This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit c0a697a1bc02805a532bf5854478dde4a28c8d83 Author: Lynne <[email protected]> AuthorDate: Sun Feb 8 03:58:11 2026 +0100 Commit: Lynne <[email protected]> CommitDate: Thu Feb 19 19:42:30 2026 +0100 vulkan_ffv1: use regular descriptors for slice state HUGE speedup on AMD, HUGE speedup everywhere. --- libavcodec/vulkan/ffv1_dec.comp.glsl | 30 +++++++++++------ libavcodec/vulkan/ffv1_dec_reset.comp.glsl | 40 ++++++++++++++--------- libavcodec/vulkan/ffv1_dec_rgb.comp.glsl | 2 +- libavcodec/vulkan/ffv1_enc_golomb.comp.glsl | 1 + libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl | 1 + libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl | 1 + libavcodec/vulkan/ffv1_vlc.glsl | 8 ++++- libavcodec/vulkan_ffv1.c | 28 +++++++++++++--- 8 files changed, 79 insertions(+), 32 deletions(-) diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl b/libavcodec/vulkan/ffv1_dec.comp.glsl index 1f37c23b2a..720fa14cd2 100644 --- a/libavcodec/vulkan/ffv1_dec.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec.comp.glsl @@ -33,10 +33,14 @@ layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf { layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf { uint32_t slice_status[]; }; -layout (set = 1, binding = 3) uniform uimage2D dec[]; +layout (set = 1, binding = 4) uniform uimage2D dec[]; #ifndef GOLOMB +layout (set = 1, binding = 3, scalar) buffer slice_state_buf { + uint8_t slice_rc_state[]; +}; + #define READ(c, idx) get_rac_noadapt(c, idx) int get_isymbol(inout RangeCoder c) { @@ -114,10 +118,9 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, quant_table_idx, extend_lookup[quant_table_idx]); - uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]); - u8buf cd = u8buf(uint64_t(slice_state) + context_off); + uint rc_off = state_off + CONTEXT_SIZE*abs(pr[0]) + gl_LocalInvocationID.x; - rc_state[gl_LocalInvocationID.x] = cd[gl_LocalInvocationID.x].v; + rc_state[gl_LocalInvocationID.x] = slice_rc_state[rc_off]; rc_dec[gl_LocalInvocationID.x] = false; barrier(); @@ -128,7 +131,8 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, barrier(); uint i = gl_LocalInvocationID.x; if (rc_dec[i]) - cd[i].v = zero_one_state[rc_state[i] + (rc_data[i] ? 256 : 0)]; + slice_rc_state[rc_off] = zero_one_state[rc_state[i] + + (rc_data[i] ? 256 : 0)]; if (gl_LocalInvocationID.x == 0) { if (pr[0] < 0) @@ -139,7 +143,13 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, } } } -#else + +#else /* GOLOMB */ + +layout (set = 1, binding = 3, scalar) buffer slice_state_buf { + VlcState slice_vlc_state[]; +}; + GetBitContext gb; void golomb_init(inout SliceContext sc) @@ -172,8 +182,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, quant_table_idx, extend_lookup[quant_table_idx]); - uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]); - VlcState sb = VlcState(uint64_t(slice_state) + context_off); + uint vlc_off = state_off + abs(pr[0]); if (pr[0] == 0 && run_mode == 0) run_mode = 1; @@ -201,14 +210,14 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, if (run_count < 0) { run_mode = 0; run_count = 0; - diff = read_vlc_symbol(gb, sb, bits); + diff = read_vlc_symbol(gb, slice_vlc_state[vlc_off], bits); if (diff >= 0) diff++; } else { diff = 0; } } else { - diff = read_vlc_symbol(gb, sb, bits); + diff = read_vlc_symbol(gb, slice_vlc_state[vlc_off], bits); } if (pr[0] < 0) @@ -298,6 +307,7 @@ void decode_slice(inout SliceContext sc, const uint slice_idx) uvec4(0, 1, 1, 2))*plane_state_size; #ifdef GOLOMB + slice_state_off >>= 3; // division by VLC_STATE_SIZE golomb_init(sc); #endif diff --git a/libavcodec/vulkan/ffv1_dec_reset.comp.glsl b/libavcodec/vulkan/ffv1_dec_reset.comp.glsl index e708d03036..1aeb196e1e 100644 --- a/libavcodec/vulkan/ffv1_dec_reset.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec_reset.comp.glsl @@ -26,6 +26,18 @@ #include "common.glsl" #include "ffv1_common.glsl" +#ifdef GOLOMB +#define PS_SHIFT 3 +layout (set = 1, binding = 1, scalar) writeonly buffer slice_state_buf { + VlcState slice_vlc_state[]; +}; +#else +#define PS_SHIFT 2 +layout (set = 1, binding = 1, scalar) writeonly buffer slice_state_buf { + uint32_t slice_rc_state[]; +}; +#endif + void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; @@ -34,30 +46,26 @@ void main(void) return; const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z]; + uint contexts = context_count[qidx]; - uint64_t slice_state_off = uint64_t(slice_state) + - slice_idx*plane_state_size*codec_planes; + uint plane_state_len = plane_state_size >> PS_SHIFT; + uint offs = slice_idx*plane_state_len*codec_planes + + gl_WorkGroupID.z*plane_state_len + + gl_LocalInvocationID.x; #ifdef GOLOMB - uint64_t start = slice_state_off + - (gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) + - gl_LocalInvocationID.x)*VLC_STATE_SIZE; for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) { - VlcState sb = VlcState(start); - sb.drift = int16_t(0); - sb.error_sum = uint16_t(4); - sb.bias = int8_t(0); - sb.count = uint8_t(1); - start += gl_WorkGroupSize.x*VLC_STATE_SIZE; + slice_vlc_state[offs].drift = int16_t(0); + slice_vlc_state[offs].error_sum = uint16_t(4); + slice_vlc_state[offs].bias = int8_t(0); + slice_vlc_state[offs].count = uint8_t(1); + offs += gl_WorkGroupSize.x; } #else - uint64_t start = slice_state_off + - gl_WorkGroupID.z*plane_state_size + - (gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */ uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */); for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) { - u32buf(start).v = 0x80808080; - start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */); + slice_rc_state[offs] = 0x80808080; + offs += gl_WorkGroupSize.x; } #endif } diff --git a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl b/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl index fe0d6957df..72dc31ba15 100644 --- a/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec_rgb.comp.glsl @@ -24,7 +24,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_shader_image_load_formatted : require -layout (set = 1, binding = 4) writeonly uniform uimage2D dst[]; +layout (set = 1, binding = 5) writeonly uniform uimage2D dst[]; #define RGB #include "ffv1_dec.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl b/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl index a120564602..459c65d954 100644 --- a/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc_golomb.comp.glsl @@ -23,5 +23,6 @@ #pragma shader_stage(compute) #extension GL_GOOGLE_include_directive : require +#define VLC_BUFFER #define GOLOMB #include "ffv1_enc.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl b/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl index 277f88c6c3..23eca0c7ed 100644 --- a/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc_reset_golomb.comp.glsl @@ -23,5 +23,6 @@ #pragma shader_stage(compute) #extension GL_GOOGLE_include_directive : require +#define VLC_BUFFER #define GOLOMB #include "ffv1_enc_reset.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl b/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl index 8efffd19e8..c7a3d17fd5 100644 --- a/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc_rgb_golomb.comp.glsl @@ -23,5 +23,6 @@ #pragma shader_stage(compute) #extension GL_GOOGLE_include_directive : require +#define VLC_BUFFER #define GOLOMB #include "ffv1_enc_rgb.comp.glsl" diff --git a/libavcodec/vulkan/ffv1_vlc.glsl b/libavcodec/vulkan/ffv1_vlc.glsl index e1c6cf66de..68353ae9ce 100644 --- a/libavcodec/vulkan/ffv1_vlc.glsl +++ b/libavcodec/vulkan/ffv1_vlc.glsl @@ -24,7 +24,13 @@ #define VULKAN_FFV1_VLC_H #define VLC_STATE_SIZE 8 -layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer VlcState { +#ifdef VLC_BUFFER +layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer +#else +struct +#endif + +VlcState { uint32_t error_sum; int16_t drift; int8_t bias; diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index 3b1bce97d1..0e2cda1028 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -404,6 +404,12 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) slice_state, 0, fp->slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader, + 1, 1, 0, + slice_state, + f->slice_count*fp->slice_data_size, + VK_WHOLE_SIZE, + VK_FORMAT_UNDEFINED); ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader); ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader, @@ -458,16 +464,22 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) slice_status, 0, 2*f->slice_count*sizeof(uint32_t), VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 3, 0, + slice_state, + f->slice_count*fp->slice_data_size, + VK_WHOLE_SIZE, + VK_FORMAT_UNDEFINED); ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, decode_dst, decode_dst_view, - 1, 3, + 1, 4, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); if (is_rgb) ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, f->picture.f, vp->view.out, - 1, 4, + 1, 5, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); @@ -602,8 +614,12 @@ static int init_reset_shader(FFV1Context *f, FFVulkanContext *s, .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, + { /* slice_state_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, }; - ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0); if (ac == AC_GOLOMB_RICE) RET(ff_vk_shader_link(s, shd, @@ -660,6 +676,10 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, + { /* slice_state_buf */ + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, { /* dec */ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .stages = VK_SHADER_STAGE_COMPUTE_BIT, @@ -671,7 +691,7 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), }, }; - ff_vk_shader_add_descriptor_set(s, shd, desc_set, 4 + rgb, 0, 0); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 5 + rgb, 0, 0); if (ac == AC_GOLOMB_RICE) { if (rgb) _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
