> The last iteration of this patchset claimed 2.5m for the software
> encoder vs 30s hardware. The software performance improvement seems
> small compared to what I expected, yet I am surprised about the hardware
> slowdown (presuming it was the same file). Was the switch to the lut
> based writing of codes not beneficial?

It is not the same video file. The last description was for a 1080p
video, this one is between 1440p and 4K. I wanted to put more stress
on the encoder to test new performance gains.

> You don't allow the 9_7 wavelet here (intentionally?)
Yes it is not implemented in vulkan encoder. This is also why I
couldn't unify this array as you mentioned before.

Στις Δευ 19 Μαΐ 2025 στις 8:09 μ.μ., ο/η Andreas Rheinhardt
<andreas.rheinha...@outlook.com> έγραψε:
>
> IndecisiveTurtle:
> > From: IndecisiveTurtle <geoste...@gmail.com>
> >
> > Performance wise, encoding a 3440x1440 1-minute video is performed in about 
> > 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it 
> > takes about 1.3 minutes on my NVIDIA GTX 1650
>
> The last iteration of this patchset claimed 2.5m for the software
> encoder vs 30s hardware. The software performance improvement seems
> small compared to what I expected, yet I am surprised about the hardware
> slowdown (presuming it was the same file). Was the switch to the lut
> based writing of codes not beneficial?
>
> >
> > Haar shader has a subgroup optimized variant that applies when configured 
> > wavelet depth allows it
> > ---
> >  configure                                    |   1 +
> >  libavcodec/Makefile                          |   3 +
> >  libavcodec/allcodecs.c                       |   1 +
> >  libavcodec/vc2enc_vulkan.c                   | 775 +++++++++++++++++++
> >  libavcodec/vulkan/vc2_dwt_haar.comp          |  82 ++
> >  libavcodec/vulkan/vc2_dwt_haar_subgroup.comp |  75 ++
> >  libavcodec/vulkan/vc2_dwt_hor_legall.comp    |  82 ++
> >  libavcodec/vulkan/vc2_dwt_upload.comp        |  96 +++
> >  libavcodec/vulkan/vc2_dwt_ver_legall.comp    |  78 ++
> >  libavcodec/vulkan/vc2_encode.comp            | 159 ++++
> >  libavcodec/vulkan/vc2_slice_sizes.comp       | 170 ++++
> >  11 files changed, 1522 insertions(+)
> >  create mode 100644 libavcodec/vc2enc_vulkan.c
> >  create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp
> >  create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp
> >  create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp
> >  create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp
> >  create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp
> >  create mode 100644 libavcodec/vulkan/vc2_encode.comp
> >  create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp
> >
>
>
> > +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> > +static const AVOption vc2enc_options[] = {
> > +    {"tolerance",     "Max undershoot in percent", offsetof(VC2EncContext, 
> > tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, 
> > .unit = "tolerance"},
> > +    {"slice_width",   "Slice width",  offsetof(VC2EncContext, 
> > slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = 
> > "slice_width"},
> > +    {"slice_height",  "Slice height", offsetof(VC2EncContext, 
> > slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = 
> > "slice_height"},
> > +    {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, 
> > wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = 
> > "wavelet_depth"},
> > +    {"wavelet_type",  "Transform type",  offsetof(VC2EncContext, 
> > wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, 
> > VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"},
>
> You don't allow the 9_7 wavelet here (intentionally?), but then you
> should restrict the range to disallow the value 0 (== VC2_TRANSFORM_9_7).
>
> > +        {"5_3",          "LeGall (5,3)",            0, AV_OPT_TYPE_CONST, 
> > {.i64 = VC2_TRANSFORM_5_3},    INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = 
> > "wavelet_idx"},
> > +        {"haar",         "Haar (with shift)",       0, AV_OPT_TYPE_CONST, 
> > {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = 
> > "wavelet_idx"},
> > +        {"haar_noshift", "Haar (without shift)",    0, AV_OPT_TYPE_CONST, 
> > {.i64 = VC2_TRANSFORM_HAAR},   INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = 
> > "wavelet_idx"},
> > +    {"qm", "Custom quantization matrix", offsetof(VC2EncContext, 
> > quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, 
> > VC2ENC_FLAGS, .unit = "quant_matrix"},
> > +        {"default",   "Default from the specifications", 0, 
> > AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, 
> > .unit = "quant_matrix"},
> > +        {"color",     "Prevents low bitrate discoloration", 0, 
> > AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, 
> > .unit = "quant_matrix"},
> > +        {"flat",      "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = 
> > VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"},
> > +    {NULL}
> > +};
> > +
> > +static const AVClass vc2enc_class = {
> > +    .class_name = "vc2_vulkan_encoder",
> > +    .category = AV_CLASS_CATEGORY_ENCODER,
> > +    .option = vc2enc_options,
> > +    .item_name = av_default_item_name,
> > +    .version = LIBAVUTIL_VERSION_INT
> > +};
> > +
> > +static const FFCodecDefault vc2enc_defaults[] = {
> > +    { "b",              "600000000"   },
> > +    { NULL },
> > +};
> > +
> > +static const AVCodecHWConfigInternal *const ff_vc2_hw_configs[] = {
>
> Should not use ff_ prefix.
>
>
> > +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
> > +    HW_CONFIG_ENCODER_DEVICE(NONE,  VULKAN),
> > +    NULL,
> > +};
> > +
> > +const FFCodec ff_vc2_vulkan_encoder = {
> > +    .p.name         = "vc2_vulkan",
> > +    CODEC_LONG_NAME("SMPTE VC-2"),
> > +    .p.type         = AVMEDIA_TYPE_VIDEO,
> > +    .p.id           = AV_CODEC_ID_DIRAC,
> > +    .p.capabilities = AV_CODEC_CAP_HARDWARE,
> > +    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
> > +    .priv_data_size = sizeof(VC2EncVulkanContext),
> > +    .init           = vc2_encode_init,
> > +    .close          = vc2_encode_end,
> > +    FF_CODEC_ENCODE_CB(vc2_encode_frame),
> > +    .p.priv_class   = &vc2enc_class,
> > +    .defaults       = vc2enc_defaults,
> > +    CODEC_PIXFMTS(AV_PIX_FMT_VULKAN),
> > +    .hw_configs     = ff_vc2_hw_configs,
> > +};
> > diff --git a/libavcodec/vulkan/vc2_encode.comp 
> > b/libavcodec/vulkan/vc2_encode.comp
> > new file mode 100644
> > index 0000000000..4d8adcca61
> > --- /dev/null
> > +++ b/libavcodec/vulkan/vc2_encode.comp
> > @@ -0,0 +1,159 @@
> > +/*
> > + * VC2 codec
> > + *
> > + * Copyright (c) 2025 raphaelthegreat <geoste...@gmail.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
> > 02110-1301 USA
> > + */
> > +
> > +#extension GL_EXT_shader_explicit_arithmetic_types : require
> > +#extension GL_EXT_scalar_block_layout : require
> > +#extension GL_EXT_buffer_reference : require
> > +#extension GL_EXT_debug_printf : require
> > +
> > +#define MAX_DWT_LEVELS (5)
> > +
> > +layout(push_constant, scalar) uniform ComputeInfo {
> > +    u8buf bytestream;
> > +    ivec2 num_slices;
> > +    int wavelet_depth;
> > +    int size_scaler;
> > +    int prefix_bytes;
> > +};
> > +
> > +void put_vc2_ue_uint(inout PutBitContext pb, uint val)
> > +{
> > +    uint32_t pbits = 1;
> > +    int bits = 1;
> > +
> > +    ++val;
> > +
> > +    while ((val >> 8) != 0)
> > +    {
> > +        pbits |= uint32_t(interleaved_ue_golomb_tab[val & 0xff]) << bits;
> > +        val >>= 8;
> > +        bits += 16;
> > +    }
> > +
> > +    pbits |= uint32_t(top_interleaved_ue_golomb_tab[val]) << bits;
> > +    bits  += golomb_len_tab[val];
> > +    put_bits(pb, bits, pbits);
>
> I see you switched to a lut based approach; yet you use 32 bits,
> similarly to what the software decoder did before
> af9935835335cae1ae5a4ec7fc14c1b5e25c1f2d. Can you guarantee that the
> encoded coefficients fit into 32bits? Is this a requirement/consequence
> of the spec?
>
> > +}
> > +
> > +int quants[MAX_DWT_LEVELS][4];
> > +
> > +int subband_coord(int index, int h, int lvl)
> > +{
> > +    int coord = index;
> > +    coord <<= 1;
> > +    coord |= h;
> > +    coord <<= (wavelet_depth-lvl-1);
> > +    return coord;
> > +}
> > +
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to