PR #21356 opened by Lynne URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21356 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21356.patch
glslang is slow, difficult to link against, awful, inefficient, awful, not really threadsafe. Same with shaderc, except shaderc is dead-ish. This makes glsl be treated like any other language that gets compiled, like CUDA and Metal. >From b344e28b3bfc1ec934fced636d00d2ea0725e276 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 03:15:40 +0100 Subject: [PATCH 01/12] vulkan: switch to static allocation for temporary descriptor data Simplifies management, and the hardware is limited to 4 descriptor sets and whatever bindings. --- libavutil/vulkan.c | 125 +++++++++------------------------------------ libavutil/vulkan.h | 27 ++++++---- 2 files changed, 40 insertions(+), 112 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 33d7e8aace..85501c5bbd 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -333,13 +333,9 @@ void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) vk->DestroyDescriptorPool(s->hwctx->act_dev, sd->desc_pool, s->hwctx->alloc); - av_freep(&sd->desc_set_buf); - av_freep(&sd->desc_bind); av_freep(&sd->desc_sets); } - av_freep(&pool->reg_shd); - for (int i = 0; i < pool->pool_size; i++) { if (pool->cmd_buf_pools[i]) vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pools[i], @@ -1492,21 +1488,11 @@ int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst, int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage) { - VkPushConstantRange *pc; - - shd->push_consts = av_realloc_array(shd->push_consts, - sizeof(*shd->push_consts), - shd->push_consts_num + 1); - if (!shd->push_consts) - return AVERROR(ENOMEM); - - pc = &shd->push_consts[shd->push_consts_num++]; - memset(pc, 0, sizeof(*pc)); - + VkPushConstantRange *pc = &shd->push_consts[shd->push_consts_num++]; + av_assert1(shd->push_consts_num < FF_VK_MAX_PUSH_CONSTS); pc->stageFlags = stage; pc->offset = offset; pc->size = size; - return 0; } @@ -2329,11 +2315,6 @@ static int init_descriptors(FFVulkanContext *s, FFVulkanShader *shd) VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - shd->desc_layout = av_malloc_array(shd->nb_descriptor_sets, - sizeof(*shd->desc_layout)); - if (!shd->desc_layout) - return AVERROR(ENOMEM); - if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { int has_singular = 0; int max_descriptors = 0; @@ -2405,11 +2386,6 @@ int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, return err; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - shd->bound_buffer_indices = av_calloc(shd->nb_descriptor_sets, - sizeof(*shd->bound_buffer_indices)); - if (!shd->bound_buffer_indices) - return AVERROR(ENOMEM); - for (int i = 0; i < shd->nb_descriptor_sets; i++) shd->bound_buffer_indices[i] = i; } @@ -2469,31 +2445,13 @@ int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, int singular, int print_to_shader_only) { int has_sampler = 0; - FFVulkanDescriptorSet *set; if (print_to_shader_only) goto print; - /* Actual layout allocated for the pipeline */ - set = av_realloc_array(shd->desc_set, - sizeof(*shd->desc_set), - shd->nb_descriptor_sets + 1); - if (!set) - return AVERROR(ENOMEM); - shd->desc_set = set; - - set = &set[shd->nb_descriptor_sets]; - memset(set, 0, sizeof(*set)); - - set->binding = av_calloc(nb, sizeof(*set->binding)); - if (!set->binding) - return AVERROR(ENOMEM); - - set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset)); - if (!set->binding_offset) { - av_freep(&set->binding); - return AVERROR(ENOMEM); - } + FFVulkanDescriptorSet *set = &shd->desc_set[shd->nb_descriptor_sets++]; + av_assert1(shd->nb_descriptor_sets < FF_VK_MAX_DESCRIPTOR_SETS); + av_assert1(nb < FF_VK_MAX_DESCRIPTOR_BINDINGS); for (int i = 0; i < nb; i++) { set->binding[i].binding = i; @@ -2515,20 +2473,12 @@ int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { for (int i = 0; i < nb; i++) { int j; - VkDescriptorPoolSize *desc_pool_size; for (j = 0; j < shd->nb_desc_pool_size; j++) if (shd->desc_pool_size[j].type == desc[i].type) break; if (j >= shd->nb_desc_pool_size) { - desc_pool_size = av_realloc_array(shd->desc_pool_size, - sizeof(*desc_pool_size), - shd->nb_desc_pool_size + 1); - if (!desc_pool_size) - return AVERROR(ENOMEM); - - shd->desc_pool_size = desc_pool_size; shd->nb_desc_pool_size++; - memset(&desc_pool_size[j], 0, sizeof(VkDescriptorPoolSize)); + av_assert1(shd->nb_desc_pool_size < FF_VK_MAX_DESCRIPTOR_TYPES); } shd->desc_pool_size[j].type = desc[i].type; shd->desc_pool_size[j].descriptorCount += FFMAX(desc[i].elems, 1); @@ -2537,7 +2487,6 @@ int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, set->singular = singular; set->nb_bindings = nb; - shd->nb_descriptor_sets++; print: /* Write shader info */ @@ -2604,33 +2553,17 @@ int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd) { int err; - FFVulkanShaderData *sd; if (!shd->nb_descriptor_sets) return 0; - sd = av_realloc_array(pool->reg_shd, - sizeof(*pool->reg_shd), - pool->nb_reg_shd + 1); - if (!sd) - return AVERROR(ENOMEM); - - pool->reg_shd = sd; - sd = &sd[pool->nb_reg_shd++]; - memset(sd, 0, sizeof(*sd)); + FFVulkanShaderData *sd = &pool->reg_shd[pool->nb_reg_shd++]; + av_assert1(pool->nb_reg_shd < FF_VK_MAX_SHADERS); sd->shd = shd; sd->nb_descriptor_sets = shd->nb_descriptor_sets; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - sd->desc_bind = av_malloc_array(sd->nb_descriptor_sets, sizeof(*sd->desc_bind)); - if (!sd->desc_bind) - return AVERROR(ENOMEM); - - sd->desc_set_buf = av_calloc(sd->nb_descriptor_sets, sizeof(*sd->desc_set_buf)); - if (!sd->desc_set_buf) - return AVERROR(ENOMEM); - for (int i = 0; i < sd->nb_descriptor_sets; i++) { FFVulkanDescriptorSet *set = &shd->desc_set[i]; FFVulkanDescriptorSetData *sdb = &sd->desc_set_buf[i]; @@ -2717,8 +2650,8 @@ int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, return 0; } -static inline FFVulkanShaderData *get_shd_data(FFVkExecContext *e, - FFVulkanShader *shd) +static inline const FFVulkanShaderData *get_shd_data(FFVkExecContext *e, + FFVulkanShader *shd) { for (int i = 0; i < e->parent->nb_reg_shd; i++) if (e->parent->reg_shd[i].shd == shd) @@ -2734,7 +2667,7 @@ static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, { FFVulkanFunctions *vk = &s->vkfn; FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; - FFVulkanShaderData *sd = get_shd_data(e, shd); + const FFVulkanShaderData *sd = get_shd_data(e, shd); const size_t exec_offset = desc_set->singular ? 0 : desc_set->aligned_size*e->idx; void *desc = sd->desc_set_buf[set].desc_mem + /* Base */ @@ -2751,7 +2684,7 @@ static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e, { FFVulkanFunctions *vk = &s->vkfn; FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; - FFVulkanShaderData *sd = get_shd_data(e, shd); + const FFVulkanShaderData *sd = get_shd_data(e, shd); if (desc_set->singular) { for (int i = 0; i < e->parent->pool_size; i++) { @@ -2931,7 +2864,7 @@ void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, { FFVulkanFunctions *vk = &s->vkfn; VkDeviceSize offsets[1024]; - FFVulkanShaderData *sd = get_shd_data(e, shd); + const FFVulkanShaderData *sd = get_shd_data(e, shd); if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { VkShaderStageFlagBits stages = shd->stage; @@ -2943,12 +2876,15 @@ void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, if (sd && sd->nb_descriptor_sets) { if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { for (int i = 0; i < sd->nb_descriptor_sets; i++) - offsets[i] = shd->desc_set[i].singular ? 0 : shd->desc_set[i].aligned_size*e->idx; + offsets[i] = shd->desc_set[i].singular ? + 0 : shd->desc_set[i].aligned_size*e->idx; /* Bind descriptor buffers */ - vk->CmdBindDescriptorBuffersEXT(e->buf, sd->nb_descriptor_sets, sd->desc_bind); + vk->CmdBindDescriptorBuffersEXT(e->buf, sd->nb_descriptor_sets, + sd->desc_bind); /* Binding offsets */ - vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, shd->bind_point, shd->pipeline_layout, + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, shd->bind_point, + shd->pipeline_layout, 0, sd->nb_descriptor_sets, shd->bound_buffer_indices, offsets); } else if (!shd->use_push) { @@ -2980,25 +2916,10 @@ void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd) vk->DestroyPipelineLayout(s->hwctx->act_dev, shd->pipeline_layout, s->hwctx->alloc); - for (int i = 0; i < shd->nb_descriptor_sets; i++) { - FFVulkanDescriptorSet *set = &shd->desc_set[i]; - av_free(set->binding); - av_free(set->binding_offset); - } - - if (shd->desc_layout) { - for (int i = 0; i < shd->nb_descriptor_sets; i++) - if (shd->desc_layout[i]) - vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, shd->desc_layout[i], - s->hwctx->alloc); - } - - av_freep(&shd->desc_pool_size); - av_freep(&shd->desc_layout); - av_freep(&shd->desc_set); - av_freep(&shd->bound_buffer_indices); - av_freep(&shd->push_consts); - shd->push_consts_num = 0; + for (int i = 0; i < shd->nb_descriptor_sets; i++) + if (shd->desc_layout[i]) + vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, shd->desc_layout[i], + s->hwctx->alloc); } void ff_vk_uninit(FFVulkanContext *s) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index cde2876e46..7c513fb8e2 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -71,6 +71,12 @@ #define DUP_SAMPLER(x) { x, x, x, x } +#define FF_VK_MAX_DESCRIPTOR_SETS 4 +#define FF_VK_MAX_DESCRIPTOR_BINDINGS 16 +#define FF_VK_MAX_DESCRIPTOR_TYPES 16 +#define FF_VK_MAX_PUSH_CONSTS 4 +#define FF_VK_MAX_SHADERS 16 + typedef struct FFVulkanDescriptorSetBinding { const char *name; VkDescriptorType type; @@ -175,8 +181,9 @@ typedef struct FFVulkanDescriptorSet { VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ VkBufferUsageFlags usage; - VkDescriptorSetLayoutBinding *binding; - VkDeviceSize *binding_offset; + VkDescriptorSetLayoutBinding binding[FF_VK_MAX_DESCRIPTOR_BINDINGS]; + VkDeviceSize binding_offset[FF_VK_MAX_DESCRIPTOR_BINDINGS]; + int nb_bindings; /* Descriptor set is shared between all submissions */ @@ -208,20 +215,20 @@ typedef struct FFVulkanShader { VkPipelineLayout pipeline_layout; /* Push consts */ - VkPushConstantRange *push_consts; + VkPushConstantRange push_consts[FF_VK_MAX_PUSH_CONSTS]; int push_consts_num; /* Descriptor sets */ - FFVulkanDescriptorSet *desc_set; + FFVulkanDescriptorSet desc_set[FF_VK_MAX_DESCRIPTOR_SETS]; int nb_descriptor_sets; /* Descriptor buffer */ - VkDescriptorSetLayout *desc_layout; - uint32_t *bound_buffer_indices; + VkDescriptorSetLayout desc_layout[FF_VK_MAX_DESCRIPTOR_SETS]; + uint32_t bound_buffer_indices[FF_VK_MAX_DESCRIPTOR_SETS]; /* Descriptor pool */ int use_push; - VkDescriptorPoolSize *desc_pool_size; + VkDescriptorPoolSize desc_pool_size[FF_VK_MAX_DESCRIPTOR_TYPES]; int nb_desc_pool_size; } FFVulkanShader; @@ -237,8 +244,8 @@ typedef struct FFVulkanShaderData { int nb_descriptor_sets; /* Descriptor buffer */ - FFVulkanDescriptorSetData *desc_set_buf; - VkDescriptorBufferBindingInfoEXT *desc_bind; + FFVulkanDescriptorSetData desc_set_buf[FF_VK_MAX_DESCRIPTOR_SETS]; + VkDescriptorBufferBindingInfoEXT desc_bind[FF_VK_MAX_DESCRIPTOR_SETS]; /* Descriptor pools */ VkDescriptorSet *desc_sets; @@ -263,7 +270,7 @@ typedef struct FFVkExecPool { size_t qd_size; /* Registered shaders' data */ - FFVulkanShaderData *reg_shd; + FFVulkanShaderData reg_shd[FF_VK_MAX_SHADERS]; int nb_reg_shd; } FFVkExecPool; -- 2.49.1 >From 76730425be88a70027bb61f7f1aa16d44b4adab7 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 12:30:54 +0100 Subject: [PATCH 02/12] vulkan_filter: don't use lg_size[2] as the number of workgroup.z This was put there because who knows why, but it's wrong, this ends up having double dispatches in the z direction if non-1. --- libavfilter/vulkan_filter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c index e049efec03..44a4ce7242 100644 --- a/libavfilter/vulkan_filter.c +++ b/libavfilter/vulkan_filter.c @@ -304,7 +304,7 @@ int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], - shd->lg_size[2]); + 1); return ff_vk_exec_submit(vkctx, exec); fail: @@ -395,7 +395,7 @@ int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], - shd->lg_size[2]); + 1); } return ff_vk_exec_submit(vkctx, exec); @@ -474,7 +474,7 @@ int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], - shd->lg_size[2]); + 1); return ff_vk_exec_submit(vkctx, exec); fail: -- 2.49.1 >From 181aaa780a29275ea20c22e807f2d1cdea0f737c Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 09:25:30 +0100 Subject: [PATCH 03/12] configure: rename spirv_compiler to spirv_library More accurate. --- configure | 60 +++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/configure b/configure index 301a3e5e3e..7b7fc0938f 100755 --- a/configure +++ b/configure @@ -3038,7 +3038,7 @@ exr_decoder_select="bswapdsp" exr_encoder_deps="zlib" ffv1_decoder_select="rangecoder" ffv1_encoder_select="rangecoder" -ffv1_vulkan_encoder_select="vulkan spirv_compiler" +ffv1_vulkan_encoder_select="vulkan spirv_library" ffvhuff_decoder_select="huffyuv_decoder" ffvhuff_encoder_select="huffyuv_encoder" fic_decoder_select="golomb" @@ -3271,9 +3271,9 @@ av1_videotoolbox_hwaccel_deps="videotoolbox" av1_videotoolbox_hwaccel_select="av1_decoder" av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" -dpx_vulkan_hwaccel_deps="vulkan spirv_compiler" +dpx_vulkan_hwaccel_deps="vulkan spirv_library" dpx_vulkan_hwaccel_select="dpx_decoder" -ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler" +ffv1_vulkan_hwaccel_deps="vulkan spirv_library" ffv1_vulkan_hwaccel_select="ffv1_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" @@ -3351,9 +3351,9 @@ mpeg4_videotoolbox_hwaccel_deps="videotoolbox" mpeg4_videotoolbox_hwaccel_select="mpeg4_decoder" prores_videotoolbox_hwaccel_deps="videotoolbox" prores_videotoolbox_hwaccel_select="prores_decoder" -prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler" +prores_raw_vulkan_hwaccel_deps="vulkan spirv_library" prores_raw_vulkan_hwaccel_select="prores_raw_decoder" -prores_vulkan_hwaccel_deps="vulkan spirv_compiler" +prores_vulkan_hwaccel_deps="vulkan spirv_library" prores_vulkan_hwaccel_select="prores_decoder" vc1_d3d11va_hwaccel_deps="d3d11va" vc1_d3d11va_hwaccel_select="vc1_decoder" @@ -3990,19 +3990,19 @@ aresample_filter_deps="swresample" asr_filter_deps="pocketsphinx" ass_filter_deps="libass" avgblur_opencl_filter_deps="opencl" -avgblur_vulkan_filter_deps="vulkan spirv_compiler" +avgblur_vulkan_filter_deps="vulkan spirv_library" azmq_filter_deps="libzmq" -blackdetect_vulkan_filter_deps="vulkan spirv_compiler" +blackdetect_vulkan_filter_deps="vulkan spirv_library" blackframe_filter_deps="gpl" -blend_vulkan_filter_deps="vulkan spirv_compiler" +blend_vulkan_filter_deps="vulkan spirv_library" boxblur_filter_deps="gpl" boxblur_opencl_filter_deps="opencl gpl" bs2b_filter_deps="libbs2b" bwdif_cuda_filter_deps="ffnvcodec" bwdif_cuda_filter_deps_any="cuda_nvcc cuda_llvm" -bwdif_vulkan_filter_deps="vulkan spirv_compiler" -chromaber_vulkan_filter_deps="vulkan spirv_compiler" -color_vulkan_filter_deps="vulkan spirv_compiler" +bwdif_vulkan_filter_deps="vulkan spirv_library" +chromaber_vulkan_filter_deps="vulkan spirv_library" +color_vulkan_filter_deps="vulkan spirv_library" colorkey_opencl_filter_deps="opencl" colormatrix_filter_deps="gpl" convolution_opencl_filter_deps="opencl" @@ -4031,7 +4031,7 @@ elbg_filter_deps="avcodec" eq_filter_deps="gpl" erosion_opencl_filter_deps="opencl" find_rect_filter_deps="avcodec avformat gpl" -flip_vulkan_filter_deps="vulkan spirv_compiler" +flip_vulkan_filter_deps="vulkan spirv_library" flite_filter_deps="libflite threads" framerate_filter_select="scene_sad" freezedetect_filter_select="scene_sad" @@ -4040,15 +4040,15 @@ frei0r_filter_deps="frei0r" frei0r_src_filter_deps="frei0r" fspp_filter_deps="gpl" fsync_filter_deps="avformat" -gblur_vulkan_filter_deps="vulkan spirv_compiler" -hflip_vulkan_filter_deps="vulkan spirv_compiler" +gblur_vulkan_filter_deps="vulkan spirv_library" +hflip_vulkan_filter_deps="vulkan spirv_library" histeq_filter_deps="gpl" hqdn3d_filter_deps="gpl" iccdetect_filter_deps="lcms2" iccgen_filter_deps="lcms2" identity_filter_select="scene_sad" interlace_filter_deps="gpl" -interlace_vulkan_filter_deps="vulkan spirv_compiler" +interlace_vulkan_filter_deps="vulkan spirv_library" kerndeint_filter_deps="gpl" ladspa_filter_deps="ladspa libdl" lcevc_filter_deps="liblcevc_dec" @@ -4065,7 +4065,7 @@ mptestsrc_filter_deps="gpl" msad_filter_select="scene_sad" negate_filter_deps="lut_filter" nlmeans_opencl_filter_deps="opencl" -nlmeans_vulkan_filter_deps="vulkan spirv_compiler" +nlmeans_vulkan_filter_deps="vulkan spirv_library" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" @@ -4077,7 +4077,7 @@ overlay_opencl_filter_deps="opencl" overlay_qsv_filter_deps="libmfx" overlay_qsv_filter_select="qsvvpp" overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags" -overlay_vulkan_filter_deps="vulkan spirv_compiler" +overlay_vulkan_filter_deps="vulkan spirv_library" owdenoise_filter_deps="gpl" pad_opencl_filter_deps="opencl" pan_filter_deps="swresample" @@ -4101,7 +4101,7 @@ vpp_amf_filter_deps="amf" scale_qsv_filter_deps="libmfx" scale_qsv_filter_select="qsvvpp" scdet_filter_select="scene_sad" -scdet_vulkan_filter_deps="vulkan spirv_compiler" +scdet_vulkan_filter_deps="vulkan spirv_library" select_filter_select="scene_sad" sharpness_vaapi_filter_deps="vaapi" showcqt_filter_deps="avformat swscale" @@ -4127,11 +4127,11 @@ tonemap_opencl_filter_deps="opencl const_nan" transpose_opencl_filter_deps="opencl" transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" transpose_vt_filter_deps="videotoolbox VTPixelRotationSessionCreate" -transpose_vulkan_filter_deps="vulkan spirv_compiler" +transpose_vulkan_filter_deps="vulkan spirv_library" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" -vflip_vulkan_filter_deps="vulkan spirv_compiler" +vflip_vulkan_filter_deps="vulkan spirv_library" vidstabdetect_filter_deps="libvidstab" vidstabtransform_filter_deps="libvidstab" libvmaf_filter_deps="libvmaf" @@ -4141,11 +4141,11 @@ zoompan_filter_deps="swscale" zscale_filter_deps="libzimg const_nan" scale_vaapi_filter_deps="vaapi" scale_vt_filter_deps="videotoolbox VTPixelTransferSessionCreate" -scale_vulkan_filter_deps="vulkan spirv_compiler" +scale_vulkan_filter_deps="vulkan spirv_library" vpp_qsv_filter_deps="libmfx" vpp_qsv_filter_select="qsvvpp" xfade_opencl_filter_deps="opencl" -xfade_vulkan_filter_deps="vulkan spirv_compiler" +xfade_vulkan_filter_deps="vulkan spirv_library" yadif_cuda_filter_deps="ffnvcodec" yadif_cuda_filter_deps_any="cuda_nvcc cuda_llvm" yadif_videotoolbox_filter_deps="metal corevideo videotoolbox" @@ -4194,11 +4194,11 @@ cws2fws_extralibs="zlib_extralibs" # libraries, in any order avcodec_deps="avutil" -avcodec_suggest="libm stdatomic spirv_compiler" +avcodec_suggest="libm stdatomic spirv_library" avdevice_deps="avformat avcodec avutil" avdevice_suggest="libm stdatomic" avfilter_deps="avutil" -avfilter_suggest="libm stdatomic spirv_compiler" +avfilter_suggest="libm stdatomic spirv_library" avformat_deps="avcodec avutil" avformat_suggest="libm network zlib stdatomic" avutil_suggest="clock_gettime ffnvcodec gcrypt libm libdrm libmfx opencl openssl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt stdatomic" @@ -6784,14 +6784,14 @@ if test -n "$custom_allocator"; then add_extralibs "$custom_allocator_extralibs" fi -# Unlike other feature flags or libraries, spirv_compiler is not defined +# Unlike other feature flags or libraries, spirv_library is not defined # within any of our predefined categories of components. # It gets defined if either libshaderc or libglslang check succeeds. # As such, its in a state of neither being explicitly enabled, nor # explicitly disabled, but even in this state, being mentioned in # _deps results in it always passing. # Disable it explicitly to fix this. -disable spirv_compiler +disable spirv_library check_func_headers malloc.h _aligned_malloc && enable aligned_malloc check_func ${malloc_prefix}memalign && enable memalign @@ -7159,10 +7159,10 @@ enabled libharfbuzz && require_pkg_config libharfbuzz harfbuzz hb.h hb_buf if enabled libglslang; then spvremap="-lSPVRemapper" require_headers "glslang/build_info.h" && { test_cpp_condition glslang/build_info.h "GLSLANG_VERSION_MAJOR >= 16" && spvremap="" ; } - check_lib spirv_compiler glslang/Include/glslang_c_interface.h glslang_initialize_process \ + check_lib spirv_library glslang/Include/glslang_c_interface.h glslang_initialize_process \ -lglslang -lMachineIndependent -lGenericCodeGen \ ${spvremap} -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lstdc++ $libm_extralibs $pthreads_extralibs || - require spirv_compiler glslang/Include/glslang_c_interface.h glslang_initialize_process \ + require spirv_library glslang/Include/glslang_c_interface.h glslang_initialize_process \ -lglslang -lMachineIndependent -lOSDependent -lHLSL -lOGLCompiler -lGenericCodeGen \ ${spvremap} -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lstdc++ $libm_extralibs $pthreads_extralibs ; fi @@ -7254,7 +7254,7 @@ enabled librist && require_pkg_config librist "librist >= 0.2.7" libri enabled librsvg && require_pkg_config librsvg librsvg-2.0 librsvg-2.0/librsvg/rsvg.h rsvg_handle_new_from_data enabled librtmp && require_pkg_config librtmp librtmp librtmp/rtmp.h RTMP_Socket enabled librubberband && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new -lstdc++ && append librubberband_extralibs "-lstdc++" -enabled libshaderc && require_pkg_config spirv_compiler "shaderc >= 2019.1" shaderc/shaderc.h shaderc_compiler_initialize +enabled libshaderc && require_pkg_config spirv_library "shaderc >= 2019.1" shaderc/shaderc.h shaderc_compiler_initialize enabled libshine && require_pkg_config libshine shine shine/layer3.h shine_encode_buffer enabled libsmbclient && { check_pkg_config libsmbclient smbclient libsmbclient.h smbc_init || require libsmbclient libsmbclient.h smbc_init -lsmbclient; } @@ -7631,7 +7631,7 @@ elif enabled vulkan; then fi if disabled vulkan; then - disable libglslang libshaderc spirv_compiler + disable libglslang libshaderc spirv_library else check_pkg_config_header_only vulkan_1_4 "vulkan >= 1.4.317" "vulkan/vulkan.h" "defined VK_VERSION_1_4" || check_cpp_condition vulkan_1_4 "vulkan/vulkan.h" "defined(VK_VERSION_1_5) || (defined(VK_VERSION_1_4) && VK_HEADER_VERSION >= 317)" -- 2.49.1 >From ecb79d449985042fe6102d3ab6cae4f6b323bb68 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 10:13:12 +0100 Subject: [PATCH 04/12] configure/make: support compile-time SPIR-V generation --- configure | 42 +++++++++++++++++++++++++++++++++++++++--- ffbuild/common.mak | 14 +++++++++++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/configure b/configure index 7b7fc0938f..f61e59b8df 100755 --- a/configure +++ b/configure @@ -237,7 +237,7 @@ External library support: --enable-libfreetype enable libfreetype, needed for drawtext filter [no] --enable-libfribidi enable libfribidi, improves drawtext filter [no] --enable-libharfbuzz enable libharfbuzz, needed for drawtext filter [no] - --enable-libglslang enable GLSL->SPIRV compilation via libglslang [no] + --enable-libglslang enable runtime GLSL->SPIRV compilation via libglslang [no] --enable-libgme enable Game Music Emu via libgme [no] --enable-libgsm enable GSM de/encoding via libgsm [no] --enable-libiec61883 enable iec61883 via libiec61883 [no] @@ -272,7 +272,7 @@ External library support: --enable-librsvg enable SVG rasterization via librsvg [no] --enable-librubberband enable rubberband needed for rubberband filter [no] --enable-librtmp enable RTMP[E] support via librtmp [no] - --enable-libshaderc enable GLSL->SPIRV compilation via libshaderc [no] + --enable-libshaderc enable runtime GLSL->SPIRV compilation via libshaderc [no] --enable-libshine enable fixed-point MP3 encoding via libshine [no] --enable-libsmbclient enable Samba protocol via libsmbclient [no] --enable-libsnappy enable Snappy compression, needed for hap encoding [no] @@ -407,6 +407,7 @@ Toolchain options: --dep-cc=DEPCC use dependency generator DEPCC [$cc_default] --nvcc=NVCC use Nvidia CUDA compiler NVCC or clang [$nvcc_default] --ld=LD use linker LD [$ld_default] + --glslc=GLSLC use GLSL compiler GLSLC [$glslc_default] --metalcc=METALCC use metal compiler METALCC [$metalcc_default] --metallib=METALLIB use metal linker METALLIB [$metallib_default] --pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG [$pkg_config_default] @@ -429,6 +430,7 @@ Toolchain options: --extra-libs=ELIBS add ELIBS [$ELIBS] --extra-version=STRING version string suffix [] --optflags=OPTFLAGS override optimization-related compiler flags + --glslcflags=GLSLCFLAGS override glslc flags [$glslcflags_default] --nvccflags=NVCCFLAGS override nvcc flags [$nvccflags_default] --build-suffix=SUFFIX library name suffix [] --enable-pic build position-independent code @@ -1072,6 +1074,10 @@ hostcc_o(){ eval printf '%s\\n' $HOSTCC_O } +glslc_o(){ + eval printf '%s\\n' $GLSLC_O +} + nvcc_o(){ eval printf '%s\\n' $NVCC_O } @@ -1097,6 +1103,25 @@ test_objcc(){ test_cmd $objcc -Werror=missing-prototypes $CPPFLAGS $CFLAGS $OBJCFLAGS "$@" $OBJCC_C $(cc_o $TMPO) $TMPM } +test_glslc(){ + log test_glslc "$@" + cat > $TMPGLSL + log_file $TMPGLSL + test_cmd $glslc $glslcflags "$@" $(glslc_o $TMPO) $TMPGLSL +} + +check_glslc(){ + log check_glslc "$@" + name=$1 + shift 1 + disabled $name && return + disable $name + test_glslc "$@" <<EOF && enable $name +#version 460 +void main(void) {} +EOF +} + test_nvcc(){ log test_nvcc "$@" cat > $TMPCU @@ -2777,6 +2802,8 @@ CMDLINE_SET=" ln_s logfile malloc_prefix + glslc + glslcflags metalcc metallib nm @@ -4254,6 +4281,8 @@ host_cc_default="gcc" doxygen_default="doxygen" install="install" ln_s_default="ln -s -f" +glslc_default="glslang" +glslcflags_default="-V100 --target-env spirv1.6 -R -Os" metalcc_default="xcrun -sdk macosx metal" metallib_default="xcrun -sdk macosx metallib" nm_default="nm -g" @@ -4356,6 +4385,7 @@ HOSTCC_C='-c' HOSTCC_E='-E -o $@' HOSTCC_O='-o $@' HOSTLD_O='-o $@' +GLSLC_O='-o $@' NVCC_C='-c' NVCC_O='-o $@' @@ -4888,7 +4918,7 @@ if enabled cuda_nvcc; then fi set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \ - target_exec x86asmexe metalcc metallib stdc stdcxx + target_exec x86asmexe glslc glslcflags metalcc metallib stdc stdcxx enabled cross_compile || host_cc_default=$cc set_default host_cc @@ -4960,6 +4990,7 @@ tmpfile TMPE $EXESUF tmpfile TMPH .h tmpfile TMPM .m tmpfile TMPCU .cu +tmpfile TMPGLSL .comp.glsl tmpfile TMPO .o tmpfile TMPS .S tmpfile TMPSH .sh @@ -7630,6 +7661,8 @@ elif enabled vulkan; then check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 277)" fi +enabled vulkan && check_glslc vulkan + if disabled vulkan; then disable libglslang libshaderc spirv_library else @@ -8383,6 +8416,7 @@ RESPONSE_FILES=$response_files AR_O=$ar_o AR_CMD=$ar NM_CMD=$nm +GLSLC=$glslc METALCC=$metalcc METALLIB=$metallib RANLIB=$ranlib @@ -8396,6 +8430,7 @@ CFLAGS=$CFLAGS CXXFLAGS=$CXXFLAGS OBJCFLAGS=$OBJCFLAGS ASFLAGS=$ASFLAGS +GLSLCFLAGS=$glslcflags NVCCFLAGS=$nvccflags AS_C=$AS_C AS_O=$AS_O @@ -8407,6 +8442,7 @@ CC_E=$CC_E CC_O=$CC_O CXX_C=$CXX_C CXX_O=$CXX_O +GLSLC_O=$GLSLC_O NVCC_C=$NVCC_C NVCC_O=$NVCC_O LD_O=$LD_O diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 89c0c413e1..6a37a606e4 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -27,7 +27,7 @@ BIN2C = $(BIN2CEXE) ifndef V Q = @ ECHO = printf "$(1)\t%s\n" $(2) -BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS X86ASM AR LD LDXX STRIP CP WINDRES NVCC BIN2C METALCC METALLIB +BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS X86ASM AR LD LDXX STRIP CP WINDRES GLSLC NVCC BIN2C METALCC METALLIB SILENT = DEPCC DEPCXX DEPHOSTCC DEPAS DEPX86ASM RANLIB RM MSG = $@ @@ -68,6 +68,7 @@ COMPILE_S = $(call COMPILE,AS) COMPILE_M = $(call COMPILE,OBJCC) COMPILE_X86ASM = $(call COMPILE,X86ASM) COMPILE_HOSTC = $(call COMPILE,HOSTCC) +COMPILE_GLSLC = $(call COMPILE,GLSLC) COMPILE_NVCC = $(call COMPILE,NVCC) COMPILE_MMI = $(call COMPILE,CC,MMIFLAGS) COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) @@ -130,6 +131,12 @@ RUN_MINIFY = $(M)sed 's!/\\*.*\\*/!!g' $< | tr '\n' ' ' | tr -s ' ' | sed 's/^ / %.gz: TAG = GZIP %.min: TAG = MINIFY +%.spv: %.glsl + $(COMPILE_GLSLC) + +%.spv.c: %.spv $(BIN2CEXE) + $(RUN_BIN2C) + %.metal.air: %.metal $(METALCC) $< -o $@ @@ -228,10 +235,11 @@ ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR) SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-) SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%) HOBJS = $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o)) +SPVOBJS = $(filter %.spv.o,$(OBJS)) PTXOBJS = $(filter %.ptx.o,$(OBJS)) $(HOBJS): CCFLAGS += $(CFLAGS_HEADERS) checkheaders: $(HOBJS) -.SECONDARY: $(HOBJS:.o=.c) $(PTXOBJS:.o=.c) $(PTXOBJS:.o=.gz) $(PTXOBJS:.o=) +.SECONDARY: $(HOBJS:.o=.c) $(SPVOBJS:.o=.c) $(SPVOBJS:.o=) $(PTXOBJS:.o=.c) $(PTXOBJS:.o=.gz) $(PTXOBJS:.o=) alltools: $(TOOLS) $(HOSTOBJS): %.o: %.c @@ -250,7 +258,7 @@ $(TOOLOBJS): | tools OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS)) -CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb +CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.c *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a define RULES -- 2.49.1 >From 2dff147c646352ef3cc5b2e337813197ceb9cb03 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 04:26:36 +0100 Subject: [PATCH 05/12] vulkan: add support for precompiled shaders --- libavutil/vulkan.c | 69 +++++++++++++++++++++++++++++++++++++--------- libavutil/vulkan.h | 17 ++++++++++-- 2 files changed, 71 insertions(+), 15 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 85501c5bbd..b3f15daf8c 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -2078,19 +2078,14 @@ void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); } -int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, - VkPipelineStageFlags stage, - const char *extensions[], int nb_extensions, - int lg_x, int lg_y, int lg_z, - uint32_t required_subgroup_size) +int ff_vk_shader_load(FFVulkanShader *shd, + VkPipelineStageFlags stage, VkSpecializationInfo *spec, + uint32_t wg_size[3], uint32_t required_subgroup_size) { - av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); - - shd->name = name; shd->stage = stage; - shd->lg_size[0] = lg_x; - shd->lg_size[1] = lg_y; - shd->lg_size[2] = lg_z; + shd->precompiled = 1; + shd->specialization_info = spec; + memcpy(shd->lg_size, wg_size, 3*sizeof(uint32_t)); switch (shd->stage) { case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: @@ -2109,6 +2104,22 @@ int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, break; }; + return 0; +} + +int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, + VkPipelineStageFlags stage, + const char *extensions[], int nb_extensions, + int lg_x, int lg_y, int lg_z, + uint32_t required_subgroup_size) +{ + ff_vk_shader_load(shd, stage, NULL, + (uint32_t []) { lg_x, lg_y, lg_z }, required_subgroup_size); + + shd->name = name; + shd->precompiled = 0; + av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); + if (required_subgroup_size) { shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO; shd->subgroup_info.requiredSubgroupSize = required_subgroup_size; @@ -2254,6 +2265,7 @@ static int init_compute_pipeline(FFVulkanContext *s, FFVulkanShader *shd, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT : 0x0, .stage = shd->stage, .module = mod, + .pSpecializationInfo = shd->specialization_info, }, }; @@ -2291,7 +2303,7 @@ static int create_shader_object(FFVulkanContext *s, FFVulkanShader *shd, .setLayoutCount = shd->nb_descriptor_sets, .pushConstantRangeCount = shd->push_consts_num, .pPushConstantRanges = shd->push_consts, - .pSpecializationInfo = NULL, + .pSpecializationInfo = shd->specialization_info, }; ret = vk->CreateShadersEXT(s->hwctx->act_dev, 1, &shader_obj_create, @@ -2371,11 +2383,39 @@ static int init_descriptors(FFVulkanContext *s, FFVulkanShader *shd) } int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, - uint8_t *spirv, size_t spirv_len, + const char *spirv, size_t spirv_len, const char *entrypoint) { int err; FFVulkanFunctions *vk = &s->vkfn; + VkSpecializationMapEntry spec_entries[3]; + VkSpecializationInfo spec_info; + + if (shd->precompiled) { + if (!shd->specialization_info) { + spec_info = (VkSpecializationInfo) { + .pMapEntries = spec_entries, + .mapEntryCount = 0, + .pData = shd->lg_size, + .dataSize = 0, + }; + shd->specialization_info = &spec_info; + } + + VkSpecializationMapEntry *spe = (void *)shd->specialization_info->pMapEntries; + for (int i = 0; i < 3; i++) { + spe[shd->specialization_info->mapEntryCount++] = (VkSpecializationMapEntry) { + .constantID = i, + .offset = shd->specialization_info->dataSize + i*sizeof(uint32_t), + .size = sizeof(uint32_t), + }; + } + + uint8_t *spd = (uint8_t *)shd->specialization_info->pData; + memcpy(&spd[shd->specialization_info->dataSize], + shd->lg_size, 3*sizeof(uint32_t)); + shd->specialization_info->dataSize += 3*sizeof(uint32_t); + } err = init_descriptors(s, shd); if (err < 0) @@ -2488,6 +2528,9 @@ int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, set->singular = singular; set->nb_bindings = nb; + if (shd->precompiled) + return 0; + print: /* Write shader info */ for (int i = 0; i < nb; i++) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 7c513fb8e2..a73517fa58 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -194,11 +194,15 @@ typedef struct FFVulkanShader { /* Name for id/debugging purposes */ const char *name; + /* Whether shader is precompiled or not */ + int precompiled; + VkSpecializationInfo *specialization_info; + /* Shader text */ AVBPrint src; /* Compute shader local group sizes */ - int lg_size[3]; + uint32_t lg_size[3]; /* Shader bind point/type */ VkPipelineStageFlags stage; @@ -608,6 +612,15 @@ int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size); +/** + * Initialize a shader object. + * The workgroup size must have local_size_XYZ_id of 0, 1, 2. + * If VkSpecializationInfo exists, it must have at least 3x4 byte entry left. + */ +int ff_vk_shader_load(FFVulkanShader *shd, + VkPipelineStageFlags stage, VkSpecializationInfo *spec, + uint32_t wg_size[3], uint32_t required_subgroup_size); + /** * Output the shader code as logging data, with a specific * priority. @@ -618,7 +631,7 @@ void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio); * Link a shader into an executable. */ int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, - uint8_t *spirv, size_t spirv_len, + const char *spirv, size_t spirv_len, const char *entrypoint); /** -- 2.49.1 >From a57024c4e84fae3ffe21f3e7680ffe4c06331185 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 06:55:50 +0100 Subject: [PATCH 06/12] configure: rename PTX_COMPRESSION to SHADER_COMPRESSION Its useful for GLSL and Metal as well. --- configure | 8 ++++---- ffbuild/common.mak | 2 +- libavfilter/cuda/load_helper.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/configure b/configure index f61e59b8df..e2c4047a8c 100755 --- a/configure +++ b/configure @@ -531,7 +531,7 @@ Developer options (useful when working on FFmpeg itself): --enable-linux-perf enable Linux Performance Monitor API --enable-macos-kperf enable macOS kperf (private) API --disable-large-tests disable tests that use a large amount of memory - --disable-ptx-compression don't compress CUDA PTX code even when possible + --disable-shader-compression don't compress shader code even when possible --disable-resource-compression don't compress resources even when possible --disable-version-tracking don't include the git/release version in the build @@ -2192,7 +2192,7 @@ CONFIG_LIST=" neon_clobber_test ossfuzz pic - ptx_compression + shader_compression resource_compression thumb valgrind_backtrace @@ -4328,7 +4328,7 @@ enable faan faandct faanidct enable iamf enable large_tests enable optimizations -enable ptx_compression +enable shader_compression enable resource_compression enable runtime_cpudetect enable safe_bitstream_reader @@ -7120,7 +7120,7 @@ EOF [ -x "$(command -v gzip)" ] && enable gzip -enabled zlib_gzip && enabled gzip || disable ptx_compression +enabled zlib_gzip && enabled gzip || disable shader_compression enabled zlib_gzip && enabled gzip || disable resource_compression diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 6a37a606e4..c9918a9101 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -149,7 +149,7 @@ RUN_MINIFY = $(M)sed 's!/\\*.*\\*/!!g' $< | tr '\n' ' ' | tr -s ' ' | sed 's/^ / %.ptx: %.cu $(SRC_PATH)/compat/cuda/cuda_runtime.h $(COMPILE_NVCC) -ifdef CONFIG_PTX_COMPRESSION +ifdef CONFIG_SHADER_COMPRESSION %.ptx.gz: %.ptx $(RUN_GZIP) diff --git a/libavfilter/cuda/load_helper.c b/libavfilter/cuda/load_helper.c index b049ec7130..115523d642 100644 --- a/libavfilter/cuda/load_helper.c +++ b/libavfilter/cuda/load_helper.c @@ -23,7 +23,7 @@ #include "libavutil/cuda_check.h" #include "libavutil/mem.h" -#if CONFIG_PTX_COMPRESSION +#if CONFIG_SHADER_COMPRESSION #include <zlib.h> #define CHUNK_SIZE 1024 * 64 #endif @@ -37,7 +37,7 @@ int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_mo { CudaFunctions *cu = hwctx->internal->cuda_dl; -#if CONFIG_PTX_COMPRESSION +#if CONFIG_SHADER_COMPRESSION z_stream stream = { 0 }; uint8_t *buf, *tmp; uint64_t buf_size; -- 2.49.1 >From da75cbc0bc96e9d4641a6d849aa2a28d477754ba Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 10:37:12 +0100 Subject: [PATCH 07/12] cuda/load_helper: move zlib decompression into a separate file Allows it to be reused for Vulkan --- libavfilter/cuda/load_helper.c | 60 ++++-------------------- libavutil/Makefile | 1 + libavutil/zlib_utils.h | 83 ++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 52 deletions(-) create mode 100644 libavutil/zlib_utils.h diff --git a/libavfilter/cuda/load_helper.c b/libavfilter/cuda/load_helper.c index 115523d642..ae8adcfeb7 100644 --- a/libavfilter/cuda/load_helper.c +++ b/libavfilter/cuda/load_helper.c @@ -24,8 +24,7 @@ #include "libavutil/mem.h" #if CONFIG_SHADER_COMPRESSION -#include <zlib.h> -#define CHUNK_SIZE 1024 * 64 +#include "libavutil/zlib_utils.h" #endif #include "load_helper.h" @@ -38,58 +37,15 @@ int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_mo CudaFunctions *cu = hwctx->internal->cuda_dl; #if CONFIG_SHADER_COMPRESSION - z_stream stream = { 0 }; - uint8_t *buf, *tmp; - uint64_t buf_size; - int ret; - - if (inflateInit2(&stream, 32 + 15) != Z_OK) { - av_log(avctx, AV_LOG_ERROR, "Error during zlib initialisation: %s\n", stream.msg); - return AVERROR(ENOSYS); - } - - buf_size = CHUNK_SIZE * 4; - buf = av_realloc(NULL, buf_size); - if (!buf) { - inflateEnd(&stream); - return AVERROR(ENOMEM); - } - - stream.next_in = data; - stream.avail_in = length; - - do { - stream.avail_out = buf_size - stream.total_out; - stream.next_out = buf + stream.total_out; - - ret = inflate(&stream, Z_FINISH); - if (ret != Z_OK && ret != Z_STREAM_END && ret != Z_BUF_ERROR) { - av_log(avctx, AV_LOG_ERROR, "zlib inflate error(%d): %s\n", ret, stream.msg); - inflateEnd(&stream); - av_free(buf); - return AVERROR(EINVAL); - } - - if (stream.avail_out == 0) { - buf_size += CHUNK_SIZE; - tmp = av_realloc(buf, buf_size); - if (!tmp) { - inflateEnd(&stream); - av_free(buf); - return AVERROR(ENOMEM); - } - buf = tmp; - } - } while (ret != Z_STREAM_END); - - // NULL-terminate string - // there is guaranteed to be space for this, due to condition in loop - buf[stream.total_out] = 0; - - inflateEnd(&stream); + uint8_t *out; + size_t out_len; + int ret = ff_zlib_expand(avctx, &out, &out_len, + data, length); + if (ret < 0) + return ret; ret = CHECK_CU(cu->cuModuleLoadData(cu_module, buf)); - av_free(buf); + av_free(out); return ret; #else return CHECK_CU(cu->cuModuleLoadData(cu_module, data)); diff --git a/libavutil/Makefile b/libavutil/Makefile index ee77e51c08..2e0e12c08b 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -234,6 +234,7 @@ STLIBOBJS-$(CONFIG_SWSCALE) += half2float.o SHLIBOBJS-$(HAVE_GNU_WINDRES) += avutilres.o SKIPHEADERS += objc.h +SKIPHEADERS-$(CONFIG_ZLIB) += zlib_utils.h SKIPHEADERS-$(HAVE_CUDA_H) += hwcontext_cuda.h SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h \ cuda_check.h diff --git a/libavutil/zlib_utils.h b/libavutil/zlib_utils.h new file mode 100644 index 0000000000..a00d5d85ed --- /dev/null +++ b/libavutil/zlib_utils.h @@ -0,0 +1,83 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_ZLIB_UTILS_H +#define AVUTIL_ZLIB_UTILS_H + +#include <zlib.h> +#define CHUNK_SIZE 1024 * 64 + +static int ff_zlib_expand(void *ctx, uint8_t **out, size_t *out_len, + const uint8_t *src, int src_len) +{ + int ret; + + z_stream stream = { 0 }; + if (inflateInit2(&stream, 32 + 15) != Z_OK) { + av_log(ctx, AV_LOG_ERROR, "Error during zlib initialisation: %s\n", + stream.msg); + return AVERROR(ENOSYS); + } + + uint64_t buf_size = CHUNK_SIZE * 4; + uint8_t *buf = av_realloc(NULL, buf_size); + if (!buf) { + inflateEnd(&stream); + return AVERROR(ENOMEM); + } + + stream.next_in = src; + stream.avail_in = src_len; + + do { + stream.avail_out = buf_size - stream.total_out; + stream.next_out = buf + stream.total_out; + + ret = inflate(&stream, Z_FINISH); + if (ret != Z_OK && ret != Z_STREAM_END && ret != Z_BUF_ERROR) { + av_log(ctx, AV_LOG_ERROR, "zlib inflate error(%d): %s\n", + ret, stream.msg); + inflateEnd(&stream); + av_free(buf); + return AVERROR(EINVAL); + } + + if (stream.avail_out == 0) { + buf_size += CHUNK_SIZE; + uint8_t *tmp = av_realloc(buf, buf_size); + if (!tmp) { + inflateEnd(&stream); + av_free(buf); + return AVERROR(ENOMEM); + } + buf = tmp; + } + } while (ret != Z_STREAM_END); + + // NULL-terminate string + // there is guaranteed to be space for this, due to condition in loop + buf[stream.total_out] = 0; + + inflateEnd(&stream); + + *out = buf; + *out_len = stream.total_out; + + return 0; +} +#endif /* AVUTIL_ZLIB_UTILS_H */ -- 2.49.1 >From 46302e2a1d6bd1e8200992a6473a687e47d3d369 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 10:37:52 +0100 Subject: [PATCH 08/12] configure: allow shader compression for Vulkan 14KiB -> 4KiB bwdif shader. Saves binary space. --- ffbuild/common.mak | 8 ++++++++ libavutil/vulkan.c | 29 ++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/ffbuild/common.mak b/ffbuild/common.mak index c9918a9101..5a46f08b90 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -134,8 +134,16 @@ RUN_MINIFY = $(M)sed 's!/\\*.*\\*/!!g' $< | tr '\n' ' ' | tr -s ' ' | sed 's/^ / %.spv: %.glsl $(COMPILE_GLSLC) +ifdef CONFIG_SHADER_COMPRESSION +%.spv.gz: %.spv + $(RUN_GZIP) + +%.spv.c: %.spv.gz $(BIN2CEXE) + $(RUN_BIN2C) +else %.spv.c: %.spv $(BIN2CEXE) $(RUN_BIN2C) +endif %.metal.air: %.metal $(METALCC) $< -o $@ diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index b3f15daf8c..bb0f2c9670 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -18,12 +18,17 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.h" #include "avassert.h" #include "mem.h" #include "vulkan.h" #include "libavutil/vulkan_loader.h" +#if CONFIG_SHADER_COMPRESSION +#include "libavutil/zlib_utils.h" +#endif + const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -2415,15 +2420,25 @@ int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, memcpy(&spd[shd->specialization_info->dataSize], shd->lg_size, 3*sizeof(uint32_t)); shd->specialization_info->dataSize += 3*sizeof(uint32_t); + +#if CONFIG_SHADER_COMPRESSION + uint8_t *out; + size_t out_len; + int ret = ff_zlib_expand(s, &out, &out_len, spirv, spirv_len); + if (ret < 0) + return ret; + spirv = out; + spirv_len = out_len; +#endif } err = init_descriptors(s, shd); if (err < 0) - return err; + goto end; err = init_pipeline_layout(s, shd); if (err < 0) - return err; + goto end; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { for (int i = 0; i < shd->nb_descriptor_sets; i++) @@ -2432,13 +2447,11 @@ int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { err = create_shader_object(s, shd, spirv, spirv_len, entrypoint); - if (err < 0) - return err; } else { VkShaderModule mod; err = create_shader_module(s, shd, &mod, spirv, spirv_len); if (err < 0) - return err; + goto end; switch (shd->bind_point) { case VK_PIPELINE_BIND_POINT_COMPUTE: @@ -2456,6 +2469,12 @@ int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, return err; } +end: + +#if CONFIG_SHADER_COMPRESSION + if (shd->precompiled) + av_free((void *)spirv); +#endif return 0; } -- 2.49.1 >From a96daa85c5092527216e312fc145996fdd93eec5 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 10:00:10 +0100 Subject: [PATCH 09/12] bwdif_vulkan: use compile-time SPIR-V --- configure | 2 +- libavfilter/vf_bwdif_vulkan.c | 97 ++----------------- libavfilter/vulkan/Makefile | 2 +- .../vulkan/{bwdif.comp => bwdif.comp.glsl} | 47 ++++++++- 4 files changed, 54 insertions(+), 94 deletions(-) rename libavfilter/vulkan/{bwdif.comp => bwdif.comp.glsl} (76%) diff --git a/configure b/configure index e2c4047a8c..c4a66c9a12 100755 --- a/configure +++ b/configure @@ -4027,7 +4027,7 @@ boxblur_opencl_filter_deps="opencl gpl" bs2b_filter_deps="libbs2b" bwdif_cuda_filter_deps="ffnvcodec" bwdif_cuda_filter_deps_any="cuda_nvcc cuda_llvm" -bwdif_vulkan_filter_deps="vulkan spirv_library" +bwdif_vulkan_filter_deps="vulkan" chromaber_vulkan_filter_deps="vulkan spirv_library" color_vulkan_filter_deps="vulkan spirv_library" colorkey_opencl_filter_deps="opencl" diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c index 549e814886..c6fb6abe9c 100644 --- a/libavfilter/vf_bwdif_vulkan.c +++ b/libavfilter/vf_bwdif_vulkan.c @@ -22,7 +22,6 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" #include "vulkan_filter.h" #include "yadif.h" #include "filters.h" @@ -43,27 +42,17 @@ typedef struct BWDIFParameters { int current_field; } BWDIFParameters; -extern const char *ff_source_bwdif_comp; +extern const unsigned char ff_bwdif_comp_spv_data[]; +extern const unsigned int ff_bwdif_comp_spv_len; static av_cold int init_filter(AVFilterContext *ctx) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; BWDIFVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; FFVulkanDescriptorSetBinding *desc; - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); if (!s->qf) { av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); @@ -73,119 +62,49 @@ static av_cold int init_filter(AVFilterContext *ctx) RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "bwdif", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 1, 64, 1, - 0)); - shd = &s->shd; + ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL, + (uint32_t [3]) { 1, 64, planes }, 0); desc = (FFVulkanDescriptorSetBinding []) { { .name = "prev", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { .name = "cur", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { .name = "next", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { .name = "dst", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 4, 0, 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, int parity; ); - GLSLC(1, int tff; ); - GLSLC(1, int current_field; ); - GLSLC(0, }; ); + ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 4, 0, 0); ff_vk_shader_add_push_const(&s->shd, 0, sizeof(BWDIFParameters), VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD(ff_source_bwdif_comp ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLC(1, bool filter_field = ((pos.y ^ parity) & 1) == 1; ); - GLSLF(1, bool is_intra = filter_field && (current_field == %i); ,YADIF_FIELD_END); - GLSLC(1, bool field_parity = (parity ^ tff) != 0; ); - GLSLC(0, ); - GLSLC(1, size = imageSize(dst[0]); ); - GLSLC(1, if (!IS_WITHIN(pos, size)) { ); - GLSLC(2, return; ); - GLSLC(1, } else if (is_intra) { ); - for (int i = 0; i < planes; i++) { - if (i == 1) { - GLSLF(2, size = imageSize(dst[%i]); ,i); - GLSLC(2, if (!IS_WITHIN(pos, size)) ); - GLSLC(3, return; ); - } - GLSLF(2, process_plane_intra(%i, pos); ,i); - } - GLSLC(1, } else if (filter_field) { ); - for (int i = 0; i < planes; i++) { - if (i == 1) { - GLSLF(2, size = imageSize(dst[%i]); ,i); - GLSLC(2, if (!IS_WITHIN(pos, size)) ); - GLSLC(3, return; ); - } - GLSLF(2, process_plane(%i, pos, filter_field, is_intra, field_parity); ,i); - } - GLSLC(1, } else { ); - for (int i = 0; i < planes; i++) { - if (i == 1) { - GLSLF(2, size = imageSize(dst[%i]); ,i); - GLSLC(2, if (!IS_WITHIN(pos, size)) ); - GLSLC(3, return; ); - } - GLSLF(2, imageStore(dst[%i], pos, imageLoad(cur[%i], pos)); ,i, i); - } - GLSLC(1, } ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); - + RET(ff_vk_shader_link(vkctx, &s->shd, + ff_bwdif_comp_spv_data, + ff_bwdif_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); s->initialized = 1; fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - return err; } diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile index c77aaf4f6b..4987ba3a8c 100644 --- a/libavfilter/vulkan/Makefile +++ b/libavfilter/vulkan/Makefile @@ -3,7 +3,7 @@ GEN_CLEANSUFFIXES = *.o *.c *.d clean:: $(RM) $(GEN_CLEANSUFFIXES:%=libavfilter/vulkan/%) -OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.o +OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/debayer.o VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/vulkan/*.comp)) diff --git a/libavfilter/vulkan/bwdif.comp b/libavfilter/vulkan/bwdif.comp.glsl similarity index 76% rename from libavfilter/vulkan/bwdif.comp rename to libavfilter/vulkan/bwdif.comp.glsl index 5152464823..fc190eaf82 100644 --- a/libavfilter/vulkan/bwdif.comp +++ b/libavfilter/vulkan/bwdif.comp.glsl @@ -18,6 +18,24 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#extension GL_EXT_shader_image_load_formatted : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_scalar_block_layout : require + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; + +layout(set = 0, binding = 0) uniform readonly image2D prev[]; +layout(set = 0, binding = 1) uniform readonly image2D cur[]; +layout(set = 0, binding = 2) uniform readonly image2D next[]; +layout(set = 0, binding = 3) uniform writeonly image2D dst[]; + +layout(push_constant, scalar) uniform pushConstants { + int parity; + int tff; + int current_field; +}; + const vec4 coef_lf[2] = { vec4(4309), vec4(213), }; const vec4 coef_hf[3] = { vec4(5570), vec4(3801), vec4(1016) }; const vec4 coef_sp[2] = { vec4(5077), vec4(981), }; @@ -27,8 +45,10 @@ vec4 process_intra(vec4 cur[4]) return (coef_sp[0]*(cur[1] + cur[2]) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13); } -void process_plane_intra(int idx, ivec2 pos) +void process_plane_intra(ivec2 pos) { + const uint idx = gl_LocalInvocationID.z; + vec4 dcur[4]; dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3)); dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1)); @@ -72,9 +92,11 @@ vec4 process_line(vec4 prev2[5], vec4 prev1[2], vec4 cur[4], vec4 next1[2], vec4 return mix(interpol, fd, diff_mask); } -void process_plane(int idx, const ivec2 pos, bool filter_field, - bool is_intra, bool field_parity) +void process_plane(const ivec2 pos) { + const uint idx = gl_LocalInvocationID.z; + bool field_parity = (parity ^ tff) != 0; + vec4 dcur[4]; vec4 prev1[2]; vec4 next1[2]; @@ -120,3 +142,22 @@ void process_plane(int idx, const ivec2 pos, bool filter_field, imageStore(dst[idx], pos, process_line(prev2, prev1, dcur, next1, next2)); } + +void main() +{ + const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + bool filter_field = ((pos.y ^ parity) & 1) == 1; + bool is_intra = filter_field && (current_field == 0); + +#define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) + if (!IS_WITHIN(pos, imageSize(dst[gl_LocalInvocationID.z]))) { + return; + } else if (is_intra) { + process_plane_intra(pos); + } else if (filter_field) { + process_plane(pos); + } else { + imageStore(dst[gl_LocalInvocationID.z], pos, + imageLoad(cur[gl_LocalInvocationID.z], pos)); + } +} -- 2.49.1 >From e105b0d2d1acf811b4bbe68fe6c6e173e0e930ae Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 15:21:36 +0100 Subject: [PATCH 10/12] avgblur_vulkan: generate SPIR-V during compilation --- configure | 2 +- libavfilter/vf_avgblur_vulkan.c | 114 +++++++-------------------- libavfilter/vulkan/Makefile | 1 + libavfilter/vulkan/avgblur.comp.glsl | 58 ++++++++++++++ 4 files changed, 87 insertions(+), 88 deletions(-) create mode 100644 libavfilter/vulkan/avgblur.comp.glsl diff --git a/configure b/configure index c4a66c9a12..1ad5278f6a 100755 --- a/configure +++ b/configure @@ -4017,7 +4017,7 @@ aresample_filter_deps="swresample" asr_filter_deps="pocketsphinx" ass_filter_deps="libass" avgblur_opencl_filter_deps="opencl" -avgblur_vulkan_filter_deps="vulkan spirv_library" +avgblur_vulkan_filter_deps="vulkan" azmq_filter_deps="libzmq" blackdetect_vulkan_filter_deps="vulkan spirv_library" blackframe_filter_deps="gpl" diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c index 156278dd78..847390d064 100644 --- a/libavfilter/vf_avgblur_vulkan.c +++ b/libavfilter/vf_avgblur_vulkan.c @@ -19,13 +19,15 @@ */ #include "libavutil/random_seed.h" -#include "libavutil/vulkan_spirv.h" #include "libavutil/opt.h" #include "vulkan_filter.h" #include "filters.h" #include "video.h" +extern const unsigned char ff_avgblur_comp_spv_data[]; +extern const unsigned int ff_avgblur_comp_spv_len; + typedef struct AvgBlurVulkanContext { FFVulkanContext vkctx; @@ -38,43 +40,21 @@ typedef struct AvgBlurVulkanContext { struct { float filter_norm[4]; int32_t filter_len[2]; + uint32_t planes; } opts; int size_x; int size_y; - int planes; } AvgBlurVulkanContext; -static const char blur_kernel[] = { - C(0, void distort(const ivec2 pos, const int idx) ) - C(0, { ) - C(1, vec4 sum = vec4(0); ) - C(1, for (int y = -filter_len.y; y <= filter_len.y; y++) ) - C(1, for (int x = -filter_len.x; x <= filter_len.x; x++) ) - C(2, sum += imageLoad(input_img[idx], pos + ivec2(x, y)); ) - C(0, ) - C(1, imageStore(output_img[idx], pos, sum * filter_norm); ) - C(0, } ) -}; - static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; AvgBlurVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } + FFVulkanDescriptorSetBinding *desc; s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); if (!s->qf) { @@ -84,68 +64,33 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "avgblur", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 1, 1, - 0)); - shd = &s->shd; - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 2, 0, 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, vec4 filter_norm; ); - GLSLC(1, ivec2 filter_len; ); - GLSLC(0, }; ); - GLSLC(0, ); + ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, + NULL, (uint32_t []) { 32, 1, planes }, 0); ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, vec4 res; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (!IS_WITHIN(pos, size)) ); - GLSLC(2, return; ); - if (s->planes & (1 << i)) { - GLSLF(1, distort(pos, %i); ,i); - } else { - GLSLF(1, res = imageLoad(input_img[%i], pos); ,i); - GLSLF(1, imageStore(output_img[%i], pos, res); ,i); - } - } - GLSLC(0, } ); + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = planes, + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = planes, + }, + }; - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0); + + RET(ff_vk_shader_link(vkctx, &s->shd, + ff_avgblur_comp_spv_data, + ff_avgblur_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); @@ -160,11 +105,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) s->opts.filter_norm[3] = s->opts.filter_norm[0]; fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - return err; } @@ -221,7 +161,7 @@ static void avgblur_vulkan_uninit(AVFilterContext *avctx) static const AVOption avgblur_vulkan_options[] = { { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, - { "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, + { "planes", "Set planes to filter (bitmask)", OFFSET(opts.planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, { NULL }, }; diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile index 4987ba3a8c..50f821fd53 100644 --- a/libavfilter/vulkan/Makefile +++ b/libavfilter/vulkan/Makefile @@ -3,6 +3,7 @@ GEN_CLEANSUFFIXES = *.o *.c *.d clean:: $(RM) $(GEN_CLEANSUFFIXES:%=libavfilter/vulkan/%) +OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/avgblur.comp.spv.o OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/debayer.o diff --git a/libavfilter/vulkan/avgblur.comp.glsl b/libavfilter/vulkan/avgblur.comp.glsl new file mode 100644 index 0000000000..6ac401867a --- /dev/null +++ b/libavfilter/vulkan/avgblur.comp.glsl @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#version 460 +#extension GL_EXT_shader_image_load_formatted : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_nonuniform_qualifier : require + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; + +layout(set = 0, binding = 0) uniform readonly image2D input_img[]; +layout(set = 0, binding = 1) uniform writeonly image2D output_img[]; + +layout(push_constant, scalar) uniform pushConstants { + vec4 filter_norm; + ivec2 filter_len; + uint planes; +}; + +void main() +{ + const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + +#define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) + ivec2 size = imageSize(output_img[gl_LocalInvocationID.z]); + if (!IS_WITHIN(pos, size)) + return; + + if ((planes & (1 << gl_LocalInvocationID.z)) == 0) { + imageStore(output_img[gl_LocalInvocationID.z], pos, + imageLoad(input_img[gl_LocalInvocationID.z], pos)); + return; + } + + vec4 sum = vec4(0); + for (int y = -filter_len.y; y <= filter_len.y; y++) + for (int x = -filter_len.x; x <= filter_len.x; x++) + sum += imageLoad(input_img[gl_LocalInvocationID.z], pos + ivec2(x, y)); + + imageStore(output_img[gl_LocalInvocationID.z], pos, sum * filter_norm); +} -- 2.49.1 >From 2bee285888cd6f1bffe665375a8e7dbc7bad2004 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 13:12:44 +0100 Subject: [PATCH 11/12] vulkan_prores_raw: use compile-time SPIR-V generation --- configure | 2 +- libavcodec/vulkan/Makefile | 5 +- libavcodec/vulkan/common.comp | 11 ++ libavcodec/vulkan/dct.comp | 14 +- ...ecode.comp => prores_raw_decode.comp.glsl} | 24 +++ ...aw_idct.comp => prores_raw_idct.comp.glsl} | 27 ++++ libavcodec/vulkan_prores_raw.c | 139 ++++++------------ 7 files changed, 124 insertions(+), 98 deletions(-) rename libavcodec/vulkan/{prores_raw_decode.comp => prores_raw_decode.comp.glsl} (93%) rename libavcodec/vulkan/{prores_raw_idct.comp => prores_raw_idct.comp.glsl} (85%) diff --git a/configure b/configure index 1ad5278f6a..886ab7ba1e 100755 --- a/configure +++ b/configure @@ -3378,7 +3378,7 @@ mpeg4_videotoolbox_hwaccel_deps="videotoolbox" mpeg4_videotoolbox_hwaccel_select="mpeg4_decoder" prores_videotoolbox_hwaccel_deps="videotoolbox" prores_videotoolbox_hwaccel_select="prores_decoder" -prores_raw_vulkan_hwaccel_deps="vulkan spirv_library" +prores_raw_vulkan_hwaccel_deps="vulkan" prores_raw_vulkan_hwaccel_select="prores_raw_decoder" prores_vulkan_hwaccel_deps="vulkan spirv_library" prores_vulkan_hwaccel_select="prores_decoder" diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 35e96c506d..c1f22cb8cc 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -14,9 +14,8 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ vulkan/ffv1_common.o vulkan/ffv1_reset.o \ vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o -OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o vulkan/dct.o \ - vulkan/prores_raw_decode.o \ - vulkan/prores_raw_idct.o +OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o \ + vulkan/prores_raw_idct.comp.spv.o OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o vulkan/dct.o \ vulkan/prores_vld.o \ diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp index 3538aeacd1..19619e2d06 100644 --- a/libavcodec/vulkan/common.comp +++ b/libavcodec/vulkan/common.comp @@ -18,6 +18,15 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef VULKAN_COMMON_H +#define VULKAN_COMMON_H + +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require +#extension GL_EXT_expect_assume : enable +#extension GL_EXT_control_flow_attributes : enable + layout(buffer_reference, buffer_reference_align = 1) buffer u8buf { uint8_t v; }; @@ -336,3 +345,5 @@ int left_bits(in GetBitContext gb) { return int(gb.buf_end - gb.buf) * 8 + gb.bits_valid; } + +#endif /* VULKAN_COMMON_H */ diff --git a/libavcodec/vulkan/dct.comp b/libavcodec/vulkan/dct.comp index 34c6ad128f..3cae84877f 100644 --- a/libavcodec/vulkan/dct.comp +++ b/libavcodec/vulkan/dct.comp @@ -31,12 +31,22 @@ * IEEE Transactions on Communications, Vol. 25, No. 9, pp 1004-1009, Sept. 1977 */ +#ifndef VULKAN_DCT_H +#define VULKAN_DCT_H + +#ifndef NB_BLOCKS +#define NB_BLOCKS 1 +#endif + #ifndef NB_COMPONENTS #define NB_COMPONENTS 1 #endif +layout(constant_id = 3) const uint32_t nb_blocks = NB_BLOCKS; +layout(constant_id = 4) const uint32_t nb_components = NB_COMPONENTS; + /* Padded by 1 row to avoid bank conflicts */ -shared float blocks[NB_BLOCKS][NB_COMPONENTS*8*(8 + 1)]; +shared float blocks[nb_blocks][nb_components*8*(8 + 1)]; const float idct_scale[64] = { 0.1250000000000000, 0.1733799806652684, 0.1633203706095471, 0.1469844503024199, @@ -117,3 +127,5 @@ void idct8(uint block, uint offset, uint stride) blocks[block][6*stride + offset] = u6; blocks[block][7*stride + offset] = u7; } + +#endif /* VULKAN_DCT_H */ diff --git a/libavcodec/vulkan/prores_raw_decode.comp b/libavcodec/vulkan/prores_raw_decode.comp.glsl similarity index 93% rename from libavcodec/vulkan/prores_raw_decode.comp rename to libavcodec/vulkan/prores_raw_decode.comp.glsl index 384046f891..8f0717a408 100644 --- a/libavcodec/vulkan/prores_raw_decode.comp +++ b/libavcodec/vulkan/prores_raw_decode.comp.glsl @@ -20,6 +20,30 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_scalar_block_layout : require + +#include "common.comp" + +struct TileData { + ivec2 pos; + uint offset; + uint size; +}; + +layout(set = 0, binding = 0) uniform writeonly uimage2D dst; +layout(set = 0, binding = 1, scalar) readonly buffer frame_data_buf { + TileData tile_data[]; +}; + +layout(push_constant, scalar) uniform pushConstants { + u8buf pkt_data; + ivec2 frame_size; + ivec2 tile_size; + uint8_t qmat[64]; +}; + #define COMP_ID (gl_LocalInvocationID.x) GetBitContext gb; diff --git a/libavcodec/vulkan/prores_raw_idct.comp b/libavcodec/vulkan/prores_raw_idct.comp.glsl similarity index 85% rename from libavcodec/vulkan/prores_raw_idct.comp rename to libavcodec/vulkan/prores_raw_idct.comp.glsl index c9850d17d7..64c234e78e 100644 --- a/libavcodec/vulkan/prores_raw_idct.comp +++ b/libavcodec/vulkan/prores_raw_idct.comp.glsl @@ -20,6 +20,33 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_shader_image_load_formatted : require + +#include "common.comp" +#include "dct.comp" + +struct TileData { + ivec2 pos; + uint offset; + uint size; +}; + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; +layout(set = 0, binding = 0) uniform uimage2D dst; +layout(set = 0, binding = 1, scalar) readonly buffer frame_data_buf { + TileData tile_data[]; +}; + +layout(push_constant, scalar) uniform pushConstants { + u8buf pkt_data; + ivec2 frame_size; + ivec2 tile_size; + uint8_t qmat[64]; +}; + #define COMP_ID (gl_LocalInvocationID.z) #define BLOCK_ID (gl_LocalInvocationID.y) #define ROW_ID (gl_LocalInvocationID.x) diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c index aa2b698925..0746a38ddc 100644 --- a/libavcodec/vulkan_prores_raw.c +++ b/libavcodec/vulkan_prores_raw.c @@ -22,13 +22,13 @@ #include "hwaccel_internal.h" #include "prores_raw.h" -#include "libavutil/vulkan_spirv.h" #include "libavutil/mem.h" -extern const char *ff_source_common_comp; -extern const char *ff_source_dct_comp; -extern const char *ff_source_prores_raw_decode_comp; -extern const char *ff_source_prores_raw_idct_comp; +extern const unsigned char ff_prores_raw_decode_comp_spv_data[]; +extern const unsigned int ff_prores_raw_decode_comp_spv_len; + +extern const unsigned char ff_prores_raw_idct_comp_spv_data[]; +extern const unsigned int ff_prores_raw_idct_comp_spv_len; const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc = { .codec_id = AV_CODEC_ID_PRORES_RAW, @@ -287,46 +287,19 @@ fail: static int add_common_data(AVCodecContext *avctx, FFVulkanContext *s, FFVulkanShader *shd, int writeonly) { - AVHWFramesContext *dec_frames_ctx; - dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - GLSLC(0, struct TileData { ); - GLSLC(1, ivec2 pos; ); - GLSLC(1, uint offset; ); - GLSLC(1, uint size; ); - GLSLC(0, }; ); - GLSLC(0, ); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf pkt_data; ); - GLSLC(1, ivec2 frame_size; ); - GLSLC(1, ivec2 tile_size; ); - GLSLC(1, uint8_t qmat[64]; ); - GLSLC(0, }; ); - GLSLC(0, ); ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData), VK_SHADER_STAGE_COMPUTE_BIT); - FFVulkanDescriptorSetBinding *desc_set; - desc_set = (FFVulkanDescriptorSetBinding []) { + FFVulkanDescriptorSetBinding desc_set[] = { { - .name = "dst", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .mem_quali = writeonly ? "writeonly" : NULL, - .dimensions = 2, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { - .name = "frame_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .mem_quali = "readonly", - .buf_content = "TileData tile_data[];", + .name = "frame_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; @@ -334,74 +307,62 @@ static int add_common_data(AVCodecContext *avctx, FFVulkanContext *s, } static int init_decode_shader(AVCodecContext *avctx, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int version) + FFVkExecPool *pool, FFVulkanShader *shd, + int version) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - RET(ff_vk_shader_init(s, shd, "prores_raw", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2", - "GL_EXT_null_initializer" }, 3, - 4, 1, 1, 0)); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL, + (uint32_t []) { 4, 1, 1 }, 0); - RET(add_common_data(avctx, s, shd, 1)); + add_common_data(avctx, s, shd, 1); - GLSLD(ff_source_prores_raw_decode_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, + ff_prores_raw_decode_comp_spv_data, + ff_prores_raw_decode_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int version) + FFVkExecPool *pool, FFVulkanShader *shd, + int version) { int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; + uint32_t spec_data[5]; + VkSpecializationMapEntry spec_entries[5]; + VkSpecializationInfo spec_info = { + .pMapEntries = spec_entries, + .mapEntryCount = 2, + .pData = spec_data, + .dataSize = 2*sizeof(uint32_t), + }; - RET(ff_vk_shader_init(s, shd, "prores_raw", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 8, - version == 0 ? 8 : 16 /* Horizontal blocks */, - 4 /* Components */, - 0)); + spec_data[0] = version == 0 ? 8 : 16; /* nb_blocks */ + spec_entries[0].constantID = 3; + spec_entries[0].size = 4; + spec_entries[0].offset = 0; - RET(add_common_data(avctx, s, shd, 0)); + spec_data[1] = 4; /* nb_components */ + spec_entries[1].constantID = 4; + spec_entries[1].size = 4; + spec_entries[1].offset = 4; - GLSLC(0, #define NB_BLOCKS 16); - GLSLC(0, #define NB_COMPONENTS 4); - GLSLD(ff_source_dct_comp); + ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, &spec_info, + (uint32_t []) { 8, spec_data[0], spec_data[1] }, 0); - GLSLD(ff_source_prores_raw_idct_comp); + add_common_data(avctx, s, shd, 0); - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, + ff_prores_raw_idct_comp_spv_data, + ff_prores_raw_idct_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; } @@ -423,12 +384,6 @@ static int vk_decode_prores_raw_init(AVCodecContext *avctx) FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; ProResRAWContext *prr = avctx->priv_data; - FFVkSPIRVCompiler *spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - err = ff_vk_decode_init(avctx); if (err < 0) return err; @@ -443,14 +398,12 @@ static int vk_decode_prores_raw_init(AVCodecContext *avctx) ctx->sd_ctx_free = &vk_decode_prores_raw_uninit; /* Setup decode shader */ - RET(init_decode_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &prv->decode, + RET(init_decode_shader(avctx, &ctx->s, &ctx->exec_pool, &prv->decode, prr->version)); - RET(init_idct_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &prv->idct, + RET(init_idct_shader(avctx, &ctx->s, &ctx->exec_pool, &prv->idct, prr->version)); fail: - spv->uninit(&spv); - return err; } -- 2.49.1 >From e3bd6dba621ecfd1fd4fb3ec4bc7444d81c6c47d Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Fri, 2 Jan 2026 13:41:27 +0100 Subject: [PATCH 12/12] vulkan_prores: generate SPIR-V at compile-time --- configure | 2 +- libavcodec/vulkan/Makefile | 5 +- ...prores_idct.comp => prores_idct.comp.glsl} | 38 +++- .../{prores_vld.comp => prores_vld.comp.glsl} | 38 ++++ libavcodec/vulkan_prores.c | 183 ++++++------------ 5 files changed, 136 insertions(+), 130 deletions(-) rename libavcodec/vulkan/{prores_idct.comp => prores_idct.comp.glsl} (76%) rename libavcodec/vulkan/{prores_vld.comp => prores_vld.comp.glsl} (92%) diff --git a/configure b/configure index 886ab7ba1e..060a0bded8 100755 --- a/configure +++ b/configure @@ -3380,7 +3380,7 @@ prores_videotoolbox_hwaccel_deps="videotoolbox" prores_videotoolbox_hwaccel_select="prores_decoder" prores_raw_vulkan_hwaccel_deps="vulkan" prores_raw_vulkan_hwaccel_select="prores_raw_decoder" -prores_vulkan_hwaccel_deps="vulkan spirv_library" +prores_vulkan_hwaccel_deps="vulkan" prores_vulkan_hwaccel_select="prores_decoder" vc1_d3d11va_hwaccel_deps="d3d11va" vc1_d3d11va_hwaccel_select="vc1_decoder" diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index c1f22cb8cc..093908b0b9 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -17,9 +17,8 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o \ vulkan/prores_raw_idct.comp.spv.o -OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o vulkan/dct.o \ - vulkan/prores_vld.o \ - vulkan/prores_idct.o +OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/prores_vld.comp.spv.o \ + vulkan/prores_idct.comp.spv.o OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/common.o \ vulkan/dpx_unpack.o \ diff --git a/libavcodec/vulkan/prores_idct.comp b/libavcodec/vulkan/prores_idct.comp.glsl similarity index 76% rename from libavcodec/vulkan/prores_idct.comp rename to libavcodec/vulkan/prores_idct.comp.glsl index 25431d61c1..c30e4983a1 100644 --- a/libavcodec/vulkan/prores_idct.comp +++ b/libavcodec/vulkan/prores_idct.comp.glsl @@ -16,10 +16,46 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_shader_image_load_formatted : require + +#include "common.comp" +#include "dct.comp" + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; +layout(constant_id = 6) const uint32_t interlaced = 0; + +layout(set = 0, binding = 0) readonly buffer quant_idx_buf { + uint8_t quant_idx[68400]; +}; +layout(set = 0, binding = 1) uniform uimage2D dst[3]; + +layout(push_constant, scalar) uniform pushConstants { + u8buf slice_data; + uint bitstream_size; + + uint16_t width; + uint16_t height; + uint16_t mb_width; + uint16_t mb_height; + uint16_t slice_width; + uint16_t slice_height; + uint8_t log2_slice_width; + uint8_t log2_chroma_w; + uint8_t depth; + uint8_t alpha_info; + uint8_t bottom_field; + + uint8_t qmat_luma [8*8]; + uint8_t qmat_chroma[8*8]; +}; + uint get_px(uint tex_idx, ivec2 pos) { #ifndef INTERLACED - return imageLoad(dst[tex_idx], pos).x; + return uint(imageLoad(dst[tex_idx], pos).x); #else return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field)).x; #endif diff --git a/libavcodec/vulkan/prores_vld.comp b/libavcodec/vulkan/prores_vld.comp.glsl similarity index 92% rename from libavcodec/vulkan/prores_vld.comp rename to libavcodec/vulkan/prores_vld.comp.glsl index ab0dbf0116..8bf78389e2 100644 --- a/libavcodec/vulkan/prores_vld.comp +++ b/libavcodec/vulkan/prores_vld.comp.glsl @@ -16,6 +16,44 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#version 460 +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_scalar_block_layout : require + +#define GET_BITS_SMEM 4 +#include "common.comp" + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; +layout(constant_id = 3) const uint32_t interlaced = 0; + +layout(set = 0, binding = 0) readonly buffer slice_offsets_buf { + uint32_t slice_offsets[68401]; +}; +layout(set = 0, binding = 1) writeonly buffer quant_idx_buf { + uint8_t quant_idx[68400]; +}; +layout(set = 0, binding = 2) uniform writeonly uimage2D dst[3]; + +layout(push_constant, scalar) uniform pushConstants { + u8buf slice_data; + uint bitstream_size; + + uint16_t width; + uint16_t height; + uint16_t mb_width; + uint16_t mb_height; + uint16_t slice_width; + uint16_t slice_height; + uint8_t log2_slice_width; + uint8_t log2_chroma_w; + uint8_t depth; + uint8_t alpha_info; + uint8_t bottom_field; + + uint8_t qmat_luma [8*8]; + uint8_t qmat_chroma[8*8]; +}; + /** * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8) * According to the SMPTE document, abs(prev_dc_diff) should be used diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c index 7e7c2ace9c..4ab678668c 100644 --- a/libavcodec/vulkan_prores.c +++ b/libavcodec/vulkan_prores.c @@ -21,12 +21,12 @@ #include "hwaccel_internal.h" #include "libavutil/mem.h" #include "libavutil/vulkan.h" -#include "libavutil/vulkan_spirv.h" -extern const char *ff_source_common_comp; -extern const char *ff_source_dct_comp; -extern const char *ff_source_prores_vld_comp; -extern const char *ff_source_prores_idct_comp; +extern const unsigned char ff_prores_vld_comp_spv_data[]; +extern const unsigned int ff_prores_vld_comp_spv_len; + +extern const unsigned char ff_prores_idct_comp_spv_data[]; +extern const unsigned int ff_prores_idct_comp_spv_len; const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = { .codec_id = AV_CODEC_ID_PRORES, @@ -342,171 +342,115 @@ fail: return err; } -static int add_push_data(FFVulkanShader *shd) -{ - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_data; ); - GLSLC(1, uint bitstream_size; ); - GLSLC(0, ); - GLSLC(1, uint16_t width; ); - GLSLC(1, uint16_t height; ); - GLSLC(1, uint16_t mb_width; ); - GLSLC(1, uint16_t mb_height; ); - GLSLC(1, uint16_t slice_width; ); - GLSLC(1, uint16_t slice_height; ); - GLSLC(1, uint8_t log2_slice_width; ); - GLSLC(1, uint8_t log2_chroma_w; ); - GLSLC(1, uint8_t depth; ); - GLSLC(1, uint8_t alpha_info; ); - GLSLC(1, uint8_t bottom_field; ); - GLSLC(0, ); - GLSLC(1, uint8_t qmat_luma [8*8]; ); - GLSLC(1, uint8_t qmat_chroma[8*8]; ); - GLSLC(0, }; ); - - return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters), - VK_SHADER_STAGE_COMPUTE_BIT); -} - static int init_decode_shader(AVCodecContext *avctx, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int max_num_mbs, - int interlaced) + FFVkExecPool *pool, FFVulkanShader *shd, + int max_num_mbs, int interlaced) { int err; - AVHWFramesContext *dec_frames_ctx; - dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + uint32_t spec_data[4]; + VkSpecializationMapEntry spec_entries[4]; + VkSpecializationInfo spec_info = { + .pMapEntries = spec_entries, + .mapEntryCount = 1, + .pData = spec_data, + .dataSize = 1*sizeof(uint32_t), + }; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; + spec_data[0] = interlaced; + spec_entries[0].constantID = 3; + spec_entries[0].size = 4; + spec_entries[0].offset = 0; - RET(ff_vk_shader_init(s, shd, "prores_vld", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 8, 8, 1, - 0)); + ff_vk_shader_load(shd, + VK_SHADER_STAGE_COMPUTE_BIT, &spec_info, + (uint32_t []) { 8, 8, 1 }, 0); - av_bprintf(&shd->src, "#define GET_BITS_SMEM %d\n", 4); - - if (interlaced) - av_bprintf(&shd->src, "#define INTERLACED\n"); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - RET(add_push_data(shd)); + ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); FFVulkanDescriptorSetBinding desc_set[] = { { .name = "slice_offsets_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "readonly", - .buf_content = "uint32_t slice_offsets", - .buf_elems = max_num_mbs + 1, }, { .name = "quant_idx_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "writeonly", - .buf_content = "uint8_t quant_idx", - .buf_elems = max_num_mbs, }, { .name = "dst", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .mem_quali = "writeonly", - .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = 3, /* Always 3 separate planes */ }, }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0)); + ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0); - GLSLD(ff_source_prores_vld_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, + ff_prores_vld_comp_spv_data, + ff_prores_vld_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return 0; } static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int max_num_mbs, - int interlaced) + FFVkExecPool *pool, FFVulkanShader *shd, + int max_num_mbs, int interlaced) { int err; - AVHWFramesContext *dec_frames_ctx; - dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + uint32_t spec_data[5]; + VkSpecializationMapEntry spec_entries[5]; + VkSpecializationInfo spec_info = { + .pMapEntries = spec_entries, + .mapEntryCount = 2, + .pData = spec_data, + .dataSize = 2*sizeof(uint32_t), + }; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; + spec_data[0] = 4*2; /* nb_blocks */ + spec_entries[0].constantID = 3; + spec_entries[0].size = 4; + spec_entries[0].offset = 0; - RET(ff_vk_shader_init(s, shd, "prores_idct", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 32, 2, 1, - 0)); + spec_data[1] = interlaced; + spec_entries[1].constantID = 5; + spec_entries[1].size = 4; + spec_entries[1].offset = 4; - if (interlaced) - av_bprintf(&shd->src, "#define INTERLACED\n"); + ff_vk_shader_load(shd, + VK_SHADER_STAGE_COMPUTE_BIT, &spec_info, + (uint32_t []) { 32, 2, 1 }, 0); - /* Common codec header */ - GLSLD(ff_source_common_comp); - - RET(add_push_data(shd)); + ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); FFVulkanDescriptorSetBinding desc_set[] = { { .name = "quant_idx_buf", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "readonly", - .buf_content = "uint8_t quant_idx", - .buf_elems = max_num_mbs, }, { .name = "dst", .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .elems = 3, }, }; RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); - GLSLC(0, #define NB_BLOCKS 4*2); - GLSLD(ff_source_dct_comp); - - GLSLD(ff_source_prores_idct_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_link(s, shd, + ff_prores_idct_comp_spv_data, + ff_prores_idct_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return 0; } @@ -529,17 +473,10 @@ static int vk_decode_prores_init(AVCodecContext *avctx) ProresContext *pr = avctx->priv_data; ProresVulkanDecodeContext *pv; - FFVkSPIRVCompiler *spv; int max_num_mbs, err; max_num_mbs = (avctx->coded_width >> 4) * (avctx->coded_height >> 4); - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - err = ff_vk_decode_init(avctx); if (err < 0) return err; @@ -554,15 +491,11 @@ static int vk_decode_prores_init(AVCodecContext *avctx) ctx->sd_ctx_free = vk_decode_prores_uninit; RET(init_decode_shader(avctx, &ctx->s, &ctx->exec_pool, - spv, &pv->vld, max_num_mbs, pr->frame_type != 0)); + &pv->vld, max_num_mbs, pr->frame_type != 0)); RET(init_idct_shader(avctx, &ctx->s, &ctx->exec_pool, - spv, &pv->idct, max_num_mbs, pr->frame_type != 0)); - - err = 0; + &pv->idct, max_num_mbs, pr->frame_type != 0)); fail: - spv->uninit(&spv); - return err; } -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
