This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit c91634dfe6b850cc48ba4edf9df35a70e0f78781 Author: Lynne <[email protected]> AuthorDate: Tue Feb 17 01:23:57 2026 +0100 Commit: Lynne <[email protected]> CommitDate: Thu Feb 19 19:42:35 2026 +0100 vulkan/ffv1: add current linecache for encode/decode This avoids needing expensive roundtrips when reading/writing to images, mainly in the decoder. --- libavcodec/vulkan/ffv1_common.glsl | 37 ++++++++++++++++++++++-------------- libavcodec/vulkan/ffv1_dec.comp.glsl | 14 ++++++++------ libavcodec/vulkan/ffv1_enc.comp.glsl | 15 ++++++++++++--- 3 files changed, 43 insertions(+), 23 deletions(-) diff --git a/libavcodec/vulkan/ffv1_common.glsl b/libavcodec/vulkan/ffv1_common.glsl index 5a8bb7dd4d..9ed71f7e03 100644 --- a/libavcodec/vulkan/ffv1_common.glsl +++ b/libavcodec/vulkan/ffv1_common.glsl @@ -150,6 +150,8 @@ const uint32_t log2_run[41] = { 24, }; +shared VTYPE2 linecache = {}; + #ifdef RGB #define RGB_LBUF (rgb_linecache - 1) #define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF))) @@ -167,18 +169,14 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */ - TYPE cur = TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, 0)))[comp]); + TYPE cur = linecache[1]; int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; if (has_extend_lookup && extend_lookup) { - TYPE cur2 = TYPE(0); - if (expectEXT(off.x > 0, true)) { - ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); - cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]); - } + TYPE cur2 = linecache[0]; base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; /* top-2 became current upon swap when rgb_linecache == 2 */ @@ -214,20 +212,14 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]); } - TYPE cur = TYPE(0); - if (off != ivec2(0, 0)) - cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]); + TYPE cur = linecache[1]; int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; if (has_extend_lookup && extend_lookup) { - TYPE cur2 = TYPE(0); - if (off.x > 0 && off != ivec2(1, 0)) { - ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); - cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]); - } + TYPE cur2 = linecache[0]; base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; TYPE top2 = TYPE(0); @@ -242,6 +234,23 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, #endif /* RGB */ +void linecache_load(readonly uimage2D src, ivec2 sp, int y, uint comp) +{ + if (y > 0) { + if (gl_LocalInvocationID.x == 0) { + TYPE c = TYPE(imageLoad(src, sp + LADDR(ivec2(0, y - 1)))[comp]); + linecache = VTYPE2(TYPE(0), c); + } + barrier(); + } +} + +void linecache_next(TYPE cur) +{ + linecache[0] = linecache[1]; + linecache[1] = cur; +} + #endif /* ENCODE || DECODE */ #endif /* VULKAN_FFV1_COMMON_H */ diff --git a/libavcodec/vulkan/ffv1_dec.comp.glsl b/libavcodec/vulkan/ffv1_dec.comp.glsl index bfc5bd18c2..1c9eb46a7a 100644 --- a/libavcodec/vulkan/ffv1_dec.comp.glsl +++ b/libavcodec/vulkan/ffv1_dec.comp.glsl @@ -111,8 +111,6 @@ void decode_line_pcm(ivec2 sp, int w, int y, int p) } } -shared ivec2 pr; - void decode_line(ivec2 sp, int w, int y, int p, uint state_off, uint8_t quant_table_idx, int run_index) @@ -124,11 +122,11 @@ void decode_line(ivec2 sp, int w, } #endif + linecache_load(dec[p], sp, y, 0); + for (int x = 0; x < w; x++) { - if (gl_LocalInvocationID.x == 0) - pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, - quant_table_idx, extend_lookup[quant_table_idx]); - barrier(); + ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, + quant_table_idx, extend_lookup[quant_table_idx]); uint rc_off = state_off + CONTEXT_SIZE*abs(pr[0]) + gl_LocalInvocationID.x; @@ -143,6 +141,7 @@ void decode_line(ivec2 sp, int w, uint v = zero_extend(pr[1] + diff, bits); imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); + linecache_next(TYPE(v)); } /* Image write now visible to other invocs */ @@ -183,6 +182,8 @@ void decode_line(ivec2 sp, int w, } #endif + linecache_load(dec[p], sp, y, 0); + int run_count = 0; int run_mode = 0; @@ -235,6 +236,7 @@ void decode_line(ivec2 sp, int w, uint v = zero_extend(pr[1] + diff, bits); imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); + linecache_next(TYPE(v)); } } #endif diff --git a/libavcodec/vulkan/ffv1_enc.comp.glsl b/libavcodec/vulkan/ffv1_enc.comp.glsl index a56dca396f..7a212fd6bd 100644 --- a/libavcodec/vulkan/ffv1_enc.comp.glsl +++ b/libavcodec/vulkan/ffv1_enc.comp.glsl @@ -99,10 +99,13 @@ void encode_line(in SliceContext sc, readonly uimage2D img, uint state_off, } #endif + linecache_load(img, sp, y, comp); + for (int x = 0; x < w; x++) { ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, quant_table_idx, extend_lookup[quant_table_idx]); - d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; + TYPE cur = TYPE(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]); + d[1] = int(cur) - d[1]; if (d[0] < 0) d = -d; @@ -114,8 +117,10 @@ void encode_line(in SliceContext sc, readonly uimage2D img, uint state_off, rc_state[gl_LocalInvocationID.x] = slice_rc_state[rc_off]; barrier(); - if (gl_LocalInvocationID.x == 0) + if (gl_LocalInvocationID.x == 0) { put_symbol(d[1]); + linecache_next(cur); + } barrier(); slice_rc_state[rc_off] = rc_state[gl_LocalInvocationID.x]; @@ -151,13 +156,17 @@ void encode_line(in SliceContext sc, readonly uimage2D img, uint state_off, } #endif + linecache_load(img, sp, y, comp); + int run_count = 0; bool run_mode = false; for (int x = 0; x < w; x++) { ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, quant_table_idx, extend_lookup[quant_table_idx]); - d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; + TYPE cur = TYPE(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]); + d[1] = int(cur) - d[1]; + linecache_next(cur); if (d[0] < 0) d = -d; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
