> The last iteration of this patchset claimed 2.5m for the software > encoder vs 30s hardware. The software performance improvement seems > small compared to what I expected, yet I am surprised about the hardware > slowdown (presuming it was the same file). Was the switch to the lut > based writing of codes not beneficial?
It is not the same video file. The last description was for a 1080p video, this one is between 1440p and 4K. I wanted to put more stress on the encoder to test new performance gains. > You don't allow the 9_7 wavelet here (intentionally?) Yes it is not implemented in vulkan encoder. This is also why I couldn't unify this array as you mentioned before. Στις Δευ 19 Μαΐ 2025 στις 8:09 μ.μ., ο/η Andreas Rheinhardt <andreas.rheinha...@outlook.com> έγραψε: > > IndecisiveTurtle: > > From: IndecisiveTurtle <geoste...@gmail.com> > > > > Performance wise, encoding a 3440x1440 1-minute video is performed in about > > 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it > > takes about 1.3 minutes on my NVIDIA GTX 1650 > > The last iteration of this patchset claimed 2.5m for the software > encoder vs 30s hardware. The software performance improvement seems > small compared to what I expected, yet I am surprised about the hardware > slowdown (presuming it was the same file). Was the switch to the lut > based writing of codes not beneficial? > > > > > Haar shader has a subgroup optimized variant that applies when configured > > wavelet depth allows it > > --- > > configure | 1 + > > libavcodec/Makefile | 3 + > > libavcodec/allcodecs.c | 1 + > > libavcodec/vc2enc_vulkan.c | 775 +++++++++++++++++++ > > libavcodec/vulkan/vc2_dwt_haar.comp | 82 ++ > > libavcodec/vulkan/vc2_dwt_haar_subgroup.comp | 75 ++ > > libavcodec/vulkan/vc2_dwt_hor_legall.comp | 82 ++ > > libavcodec/vulkan/vc2_dwt_upload.comp | 96 +++ > > libavcodec/vulkan/vc2_dwt_ver_legall.comp | 78 ++ > > libavcodec/vulkan/vc2_encode.comp | 159 ++++ > > libavcodec/vulkan/vc2_slice_sizes.comp | 170 ++++ > > 11 files changed, 1522 insertions(+) > > create mode 100644 libavcodec/vc2enc_vulkan.c > > create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp > > create mode 100644 libavcodec/vulkan/vc2_encode.comp > > create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp > > > > > > +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) > > +static const AVOption vc2enc_options[] = { > > + {"tolerance", "Max undershoot in percent", offsetof(VC2EncContext, > > tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, > > .unit = "tolerance"}, > > + {"slice_width", "Slice width", offsetof(VC2EncContext, > > slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = > > "slice_width"}, > > + {"slice_height", "Slice height", offsetof(VC2EncContext, > > slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = > > "slice_height"}, > > + {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, > > wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = > > "wavelet_depth"}, > > + {"wavelet_type", "Transform type", offsetof(VC2EncContext, > > wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, > > VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > > You don't allow the 9_7 wavelet here (intentionally?), but then you > should restrict the range to disallow the value 0 (== VC2_TRANSFORM_9_7). > > > + {"5_3", "LeGall (5,3)", 0, AV_OPT_TYPE_CONST, > > {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = > > "wavelet_idx"}, > > + {"haar", "Haar (with shift)", 0, AV_OPT_TYPE_CONST, > > {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = > > "wavelet_idx"}, > > + {"haar_noshift", "Haar (without shift)", 0, AV_OPT_TYPE_CONST, > > {.i64 = VC2_TRANSFORM_HAAR}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = > > "wavelet_idx"}, > > + {"qm", "Custom quantization matrix", offsetof(VC2EncContext, > > quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, > > VC2ENC_FLAGS, .unit = "quant_matrix"}, > > + {"default", "Default from the specifications", 0, > > AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, > > .unit = "quant_matrix"}, > > + {"color", "Prevents low bitrate discoloration", 0, > > AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, > > .unit = "quant_matrix"}, > > + {"flat", "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = > > VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > > + {NULL} > > +}; > > + > > +static const AVClass vc2enc_class = { > > + .class_name = "vc2_vulkan_encoder", > > + .category = AV_CLASS_CATEGORY_ENCODER, > > + .option = vc2enc_options, > > + .item_name = av_default_item_name, > > + .version = LIBAVUTIL_VERSION_INT > > +}; > > + > > +static const FFCodecDefault vc2enc_defaults[] = { > > + { "b", "600000000" }, > > + { NULL }, > > +}; > > + > > +static const AVCodecHWConfigInternal *const ff_vc2_hw_configs[] = { > > Should not use ff_ prefix. > > > > + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), > > + HW_CONFIG_ENCODER_DEVICE(NONE, VULKAN), > > + NULL, > > +}; > > + > > +const FFCodec ff_vc2_vulkan_encoder = { > > + .p.name = "vc2_vulkan", > > + CODEC_LONG_NAME("SMPTE VC-2"), > > + .p.type = AVMEDIA_TYPE_VIDEO, > > + .p.id = AV_CODEC_ID_DIRAC, > > + .p.capabilities = AV_CODEC_CAP_HARDWARE, > > + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, > > + .priv_data_size = sizeof(VC2EncVulkanContext), > > + .init = vc2_encode_init, > > + .close = vc2_encode_end, > > + FF_CODEC_ENCODE_CB(vc2_encode_frame), > > + .p.priv_class = &vc2enc_class, > > + .defaults = vc2enc_defaults, > > + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), > > + .hw_configs = ff_vc2_hw_configs, > > +}; > > diff --git a/libavcodec/vulkan/vc2_encode.comp > > b/libavcodec/vulkan/vc2_encode.comp > > new file mode 100644 > > index 0000000000..4d8adcca61 > > --- /dev/null > > +++ b/libavcodec/vulkan/vc2_encode.comp > > @@ -0,0 +1,159 @@ > > +/* > > + * VC2 codec > > + * > > + * Copyright (c) 2025 raphaelthegreat <geoste...@gmail.com> > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > > 02110-1301 USA > > + */ > > + > > +#extension GL_EXT_shader_explicit_arithmetic_types : require > > +#extension GL_EXT_scalar_block_layout : require > > +#extension GL_EXT_buffer_reference : require > > +#extension GL_EXT_debug_printf : require > > + > > +#define MAX_DWT_LEVELS (5) > > + > > +layout(push_constant, scalar) uniform ComputeInfo { > > + u8buf bytestream; > > + ivec2 num_slices; > > + int wavelet_depth; > > + int size_scaler; > > + int prefix_bytes; > > +}; > > + > > +void put_vc2_ue_uint(inout PutBitContext pb, uint val) > > +{ > > + uint32_t pbits = 1; > > + int bits = 1; > > + > > + ++val; > > + > > + while ((val >> 8) != 0) > > + { > > + pbits |= uint32_t(interleaved_ue_golomb_tab[val & 0xff]) << bits; > > + val >>= 8; > > + bits += 16; > > + } > > + > > + pbits |= uint32_t(top_interleaved_ue_golomb_tab[val]) << bits; > > + bits += golomb_len_tab[val]; > > + put_bits(pb, bits, pbits); > > I see you switched to a lut based approach; yet you use 32 bits, > similarly to what the software decoder did before > af9935835335cae1ae5a4ec7fc14c1b5e25c1f2d. Can you guarantee that the > encoded coefficients fit into 32bits? Is this a requirement/consequence > of the spec? > > > +} > > + > > +int quants[MAX_DWT_LEVELS][4]; > > + > > +int subband_coord(int index, int h, int lvl) > > +{ > > + int coord = index; > > + coord <<= 1; > > + coord |= h; > > + coord <<= (wavelet_depth-lvl-1); > > + return coord; > > +} > > + > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".