The branch, master has been updated
via 1d1643b42a62af07fcb77ad6c9992dd41f4e04af (commit)
via fd2fd3828c1e7384d06afee7481d61865ab6cfa7 (commit)
via ef7354d471c18ec5c998220eaa4a95bdd36ccb6a (commit)
from 2456a39581c3388bcf6f61c66aef36a887537c23 (commit)
- Log -----------------------------------------------------------------
commit 1d1643b42a62af07fcb77ad6c9992dd41f4e04af
Author: averne <[email protected]>
AuthorDate: Sat Nov 29 22:33:45 2025 +0100
Commit: averne <[email protected]>
CommitDate: Sun Nov 30 22:01:17 2025 +0100
vulkan/prores: use cached bitstream reader
Speedup is around 75% on NVIDIA 3050, 20% on AMD 6700XT, 5% on Intel
TigerLake.
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 0c704c3d1c..90b8610817 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -21,7 +21,6 @@
#include "hwaccel_internal.h"
#include "libavutil/mem.h"
#include "libavutil/vulkan.h"
-#include "libavutil/vulkan_loader.h"
#include "libavutil/vulkan_spirv.h"
extern const char *ff_source_common_comp;
@@ -207,14 +206,12 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
pr->frame));
+ /* Transfer ownership to the exec context */
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
vp->slices_buf = NULL;
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->metadata_buf, 1, 0));
pp->metadata_buf = NULL;
- /* Transfer ownership to the exec context */
- vp->slices_buf = pp->metadata_buf = NULL;
-
/* Input barrier */
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
@@ -404,6 +401,11 @@ static int init_shader(AVCodecContext *avctx,
FFVulkanContext *s,
local_size >> 16 & 0xff, local_size >> 8 & 0xff,
local_size >> 0 & 0xff,
0));
+ av_bprintf(&shd->src, "#define GET_BITS_SMEM\n");
+
+ if (interlaced)
+ av_bprintf(&shd->src, "#define INTERLACED\n");
+
/* Common code */
GLSLD(ff_source_common_comp);
@@ -412,9 +414,6 @@ static int init_shader(AVCodecContext *avctx,
FFVulkanContext *s,
RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
- if (interlaced)
- av_bprintf(&shd->src, "#define INTERLACED\n");
-
/* Main code */
GLSLD(source);
@@ -494,6 +493,7 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &pv->reset,
"prores_dec_reset", "main", desc_set, 1,
ff_source_prores_reset_comp, 0x080801, pr->frame_type !=
0));
+
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_offsets_buf",
commit fd2fd3828c1e7384d06afee7481d61865ab6cfa7
Author: averne <[email protected]>
AuthorDate: Sun Nov 30 13:25:37 2025 +0100
Commit: averne <[email protected]>
CommitDate: Sun Nov 30 19:21:08 2025 +0100
libavcodec/vulkan: remove unnessary member in GetBitContext
The number of remaining bits can be calculated using existing state.
This simplifies calculations and frees up one register.
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index 4b71dfd2f4..f5f466ce31 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -201,7 +201,6 @@ struct GetBitContext {
uint64_t bits;
int bits_valid;
- int size_in_bits;
#ifdef GET_BITS_SMEM
int cur_smem_pos;
#endif
@@ -265,7 +264,6 @@ void init_get_bits(inout GetBitContext gb, u8buf data, int
len)
{
gb.buf = gb.buf_start = uint64_t(data);
gb.buf_end = uint64_t(data) + len;
- gb.size_in_bits = len * 8;
/* Preload */
LOAD64()
@@ -320,5 +318,5 @@ int tell_bits(in GetBitContext gb)
int left_bits(in GetBitContext gb)
{
- return gb.size_in_bits - int(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
+ return int(gb.buf_end - gb.buf) * 8 + gb.bits_valid;
}
commit ef7354d471c18ec5c998220eaa4a95bdd36ccb6a
Author: averne <[email protected]>
AuthorDate: Sat Nov 29 22:33:26 2025 +0100
Commit: averne <[email protected]>
CommitDate: Sun Nov 30 19:21:04 2025 +0100
libavcodec/vulkan: introduce cached bitstream reader
This stores a small buffer in shared memory per decode thread (16 bytes),
which helps reduce the number of memory accesses.
The bitstream buffer is first aligned to a 4 byte boundary, so that the
buffer can be filled with a single memory request.
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index eda92ce28d..4b71dfd2f4 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -42,6 +42,10 @@ layout(buffer_reference, buffer_reference_align = 4) buffer
u32vec2buf {
u32vec2 v;
};
+layout(buffer_reference, buffer_reference_align = 4) buffer u32vec4buf {
+ u32vec4 v;
+};
+
layout(buffer_reference, buffer_reference_align = 8) buffer u64buf {
uint64_t v;
};
@@ -198,8 +202,12 @@ struct GetBitContext {
uint64_t bits;
int bits_valid;
int size_in_bits;
+#ifdef GET_BITS_SMEM
+ int cur_smem_pos;
+#endif
};
+#ifndef GET_BITS_SMEM
#define LOAD64() \
{ \
u8vec4buf ptr = u8vec4buf(gb.buf); \
@@ -218,6 +226,40 @@ struct GetBitContext {
gb.bits = uint64_t(rf) << (32 - gb.bits_valid) | gb.bits; \
gb.bits_valid += 32; \
}
+#else /* GET_BITS_SMEM */
+shared u32vec4
gb_storage[gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z];
+
+#define FILL_SMEM() \
+ { \
+ u32vec4buf ptr = u32vec4buf(gb.buf); \
+ gb_storage[gl_LocalInvocationIndex] = ptr[0].v; \
+ gb.cur_smem_pos = 0; \
+ }
+
+#define LOAD64() \
+ { \
+ gb.bits = 0; \
+ gb.bits_valid = 0; \
+ u8buf ptr = u8buf(gb.buf); \
+ for (uint i = 0; i < ((4 - uint(gb.buf_start)) & 3); ++i) { \
+ gb.bits |= uint64_t(ptr[i].v) << (56 - i * 8); \
+ gb.bits_valid += 8; \
+ gb.buf += 1; \
+ } \
+ FILL_SMEM(); \
+ }
+
+#define RELOAD32() \
+ { \
+ if (gb.cur_smem_pos >= 4) \
+ FILL_SMEM(); \
+ uint v = gb_storage[gl_LocalInvocationIndex][gb.cur_smem_pos]; \
+ gb.buf += 4; \
+ gb.bits = uint64_t(reverse4(v)) << (32 - gb.bits_valid) | gb.bits; \
+ gb.bits_valid += 32; \
+ gb.cur_smem_pos += 1; \
+ }
+#endif /* GET_BITS_SMEM */
void init_get_bits(inout GetBitContext gb, u8buf data, int len)
{
-----------------------------------------------------------------------
Summary of changes:
libavcodec/vulkan/common.comp | 46 ++++++++++++++++++++++++++++++++++++++++---
libavcodec/vulkan_prores.c | 14 ++++++-------
2 files changed, 50 insertions(+), 10 deletions(-)
hooks/post-receive
--
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]