On Mon, Oct 6, 2025 at 10:31 PM Zhao Zhili via ffmpeg-devel <[email protected]> wrote: > > PR #20654 opened by Zhao Zhili (quink) > URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20654 > Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20654.patch > > > >From ab6fd7a273b88080e878c0c6dcf5e956cbff026e Mon Sep 17 00:00:00 2001 > From: Zhao Zhili <[email protected]> > Date: Mon, 6 Oct 2025 13:41:16 +0800 > Subject: [PATCH 1/3] avutil/cpu: add CPU feature flag for arm crc32 > > --- > libavutil/aarch64/cpu.c | 5 +++++ > libavutil/cpu.c | 1 + > libavutil/cpu.h | 1 + > libavutil/tests/cpu.c | 1 + > 4 files changed, 8 insertions(+) > > diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c > index e82c0f19ab..de0d709062 100644 > --- a/libavutil/aarch64/cpu.c > +++ b/libavutil/aarch64/cpu.c > @@ -24,6 +24,7 @@ > #include <stdint.h> > #include <sys/auxv.h> > > +#define HWCAP_AARCH64_CRC32 (1 << 7) > #define HWCAP_AARCH64_ASIMDDP (1 << 20) > #define HWCAP_AARCH64_SVE (1 << 22) > #define HWCAP2_AARCH64_SVE2 (1 << 1) > @@ -36,6 +37,8 @@ static int detect_flags(void) > unsigned long hwcap = ff_getauxval(AT_HWCAP); > unsigned long hwcap2 = ff_getauxval(AT_HWCAP2); > > + if (hwcap & HWCAP_AARCH64_CRC32) > + flags |= AV_CPU_FLAG_CRC32; > if (hwcap & HWCAP_AARCH64_ASIMDDP) > flags |= AV_CPU_FLAG_DOTPROD; > if (hwcap & HWCAP_AARCH64_SVE) > @@ -67,6 +70,8 @@ static int detect_flags(void) > flags |= AV_CPU_FLAG_DOTPROD; > if (have_feature("hw.optional.arm.FEAT_I8MM")) > flags |= AV_CPU_FLAG_I8MM; > + if (have_feature("hw.optional.armv8_crc32")) > + flags |= AV_CPU_FLAG_CRC32; > > return flags; > } > diff --git a/libavutil/cpu.c b/libavutil/cpu.c > index 8f9b785ebc..d9753d58fa 100644 > --- a/libavutil/cpu.c > +++ b/libavutil/cpu.c > @@ -186,6 +186,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) > { "i8mm", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_I8MM > }, .unit = "flags" }, > { "sve", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE > }, .unit = "flags" }, > { "sve2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE2 > }, .unit = "flags" }, > + { "crc32", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CRC32 > }, .unit = "flags" }, > #elif ARCH_MIPS > { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI > }, .unit = "flags" }, > { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA > }, .unit = "flags" }, > diff --git a/libavutil/cpu.h b/libavutil/cpu.h > index 5ef5da58eb..11d7e13b99 100644 > --- a/libavutil/cpu.h > +++ b/libavutil/cpu.h > @@ -74,6 +74,7 @@ > #define AV_CPU_FLAG_I8MM (1 << 9) > #define AV_CPU_FLAG_SVE (1 <<10) > #define AV_CPU_FLAG_SVE2 (1 <<11) > +#define AV_CPU_FLAG_CRC32 (1 <<12) > #define AV_CPU_FLAG_SETEND (1 <<16) > > #define AV_CPU_FLAG_MMI (1 << 0) > diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c > index fd2e32901d..a5058f891e 100644 > --- a/libavutil/tests/cpu.c > +++ b/libavutil/tests/cpu.c > @@ -48,6 +48,7 @@ static const struct { > { AV_CPU_FLAG_I8MM, "i8mm" }, > { AV_CPU_FLAG_SVE, "sve" }, > { AV_CPU_FLAG_SVE2, "sve2" }, > + { AV_CPU_FLAG_CRC32, "crc32" }, > #elif ARCH_ARM > { AV_CPU_FLAG_ARMV5TE, "armv5te" }, > { AV_CPU_FLAG_ARMV6, "armv6" }, > -- > 2.49.1 > > > >From cbdcdff2b87a05f21e477aa686b79ff49ce64f23 Mon Sep 17 00:00:00 2001 > From: Zhao Zhili <[email protected]> > Date: Mon, 6 Oct 2025 17:19:05 +0800 > Subject: [PATCH 2/3] avutil/crc: use arm64 crc32 instruction > > On rpi5 A76 > crc_32_ieee_le_c: 23410.8 ( 1.00x) > crc_32_ieee_le_crc32: 1072.7 (21.82x) > > On RK3566 A55 > crc_32_ieee_le_c: 28778.8 ( 1.00x) > crc_32_ieee_le_crc32: 2625.8 (10.96x) > --- > doc/APIchanges | 3 ++ > libavutil/aarch64/Makefile | 4 ++- > libavutil/aarch64/crc.S | 43 ++++++++++++++++++++++++++ > libavutil/aarch64/crc_init.c | 36 ++++++++++++++++++++++ > libavutil/crc.c | 29 +++++++++++++++++- > libavutil/crc.h | 25 ++++++++++++++++ > libavutil/crc_internal.h | 28 +++++++++++++++++ > libavutil/version.h | 2 +- > tests/checkasm/Makefile | 1 + > tests/checkasm/checkasm.c | 2 ++ > tests/checkasm/checkasm.h | 1 + > tests/checkasm/hash.c | 58 ++++++++++++++++++++++++++++++++++++ > 12 files changed, 229 insertions(+), 3 deletions(-) > create mode 100644 libavutil/aarch64/crc.S > create mode 100644 libavutil/aarch64/crc_init.c > create mode 100644 libavutil/crc_internal.h > create mode 100644 tests/checkasm/hash.c > > diff --git a/doc/APIchanges b/doc/APIchanges > index 6e7f5d2037..38e8d3f403 100644 > --- a/doc/APIchanges > +++ b/doc/APIchanges > @@ -2,6 +2,9 @@ The last version increases of all libraries were on 2025-03-28 > > API changes, most recent first: > > +2025-10-xx - xxxxxxxxxx - lavu 60.14.100 - crc.h > + Add av_crc_get and av_crc2 for ASM optimization. > + > 2025-08-xx - xxxxxxxxxx - lavf 62.6.100 - oggparsevorbis.h oggparseopus.h > oggparseflac.h > Drop header packets from secondary chained ogg/{flac, opus, vorbis} streams > from demuxer output. > diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile > index 992e95e4df..825836831d 100644 > --- a/libavutil/aarch64/Makefile > +++ b/libavutil/aarch64/Makefile > @@ -1,8 +1,10 @@ > OBJS += aarch64/cpu.o \ > + aarch64/crc_init.o \ > aarch64/float_dsp_init.o \ > aarch64/tx_float_init.o \ > > -NEON-OBJS += aarch64/float_dsp_neon.o \ > +NEON-OBJS += aarch64/crc.o \ > + aarch64/float_dsp_neon.o \ > aarch64/tx_float_neon.o \ > > SVE-OBJS += aarch64/cpu_sve.o \ > diff --git a/libavutil/aarch64/crc.S b/libavutil/aarch64/crc.S > new file mode 100644 > index 0000000000..bca7a89c30 > --- /dev/null > +++ b/libavutil/aarch64/crc.S > @@ -0,0 +1,43 @@ > +/* > + * Copyright (c) 2025 Zhao Zhili <[email protected]> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include "asm.S" > + > +function ff_crc32_aarch64, export=1 > + bic x5, x3, #15 > + and x4, x3, #15 > + mov w0, w1 > + cbz x5, 2f > +1: > + ldp x6, x7, [x2], #16 > + subs x5, x5, #16 > + crc32x w0, w0, x6 > + crc32x w0, w0, x7 > + b.ne 1b > +2: > + cbz x4, 4f > +3: > + ldrb w5, [x2], #1 > + subs x4, x4, #1 > + crc32b w0, w0, w5 > + b.ne 3b > +4: > + ret > +endfunc > diff --git a/libavutil/aarch64/crc_init.c b/libavutil/aarch64/crc_init.c > new file mode 100644 > index 0000000000..a5a79f67e8 > --- /dev/null > +++ b/libavutil/aarch64/crc_init.c > @@ -0,0 +1,36 @@ > +/* > + * Copyright (c) 2025 Zhao Zhili <[email protected]> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include "libavutil/aarch64/cpu.h" > +#include "libavutil/attributes.h" > +#include "libavutil/crc_internal.h" > + > +uint32_t ff_crc32_aarch64(const AVCRC *crctab, uint32_t crc, > + const uint8_t *buffer, size_t length); > + > +av_cold void ff_crc_get_aarch64(AVCRCId crc_id, AVCRCCtx *ctx) > +{ > + if (crc_id != AV_CRC_32_IEEE_LE) > + return; > + > + int cpu_flags = av_get_cpu_flags(); > + if (cpu_flags & AV_CPU_FLAG_CRC32) > + ctx->fn = ff_crc32_aarch64; > +} > diff --git a/libavutil/crc.c b/libavutil/crc.c > index 703b56f4e0..2bc45c2a76 100644 > --- a/libavutil/crc.c > +++ b/libavutil/crc.c > @@ -23,7 +23,7 @@ > #include "thread.h" > #include "avassert.h" > #include "bswap.h" > -#include "crc.h" > +#include "crc_internal.h" > #include "error.h" > > #if CONFIG_HARDCODED_TABLES > @@ -413,3 +413,30 @@ uint32_t av_crc(const AVCRC *ctx, uint32_t crc, > > return crc; > } > + > +int av_crc_get(AVCRCId crc_id, AVCRCCtx *ctx) > +{ > + if (!ctx) > + return AVERROR(EINVAL); > + > + ctx->crctab = NULL; > + ctx->fn = NULL; > +#if ARCH_AARCH64 > + ff_crc_get_aarch64(crc_id, ctx); > +#endif > + if (ctx->fn) > + return 0; > + > + ctx->crctab = av_crc_get_table(crc_id); > + if (!ctx->crctab) > + return AVERROR_BUG; > + ctx->fn = av_crc; > + > + return 0; > +} > + > +uint32_t av_crc2(AVCRCCtx *ctx, uint32_t crc, > + const uint8_t *buffer, size_t length) > +{ > + return ctx->fn(ctx->crctab, crc, buffer, length); > +} > diff --git a/libavutil/crc.h b/libavutil/crc.h > index 7f59812a18..0c544b53bb 100644 > --- a/libavutil/crc.h > +++ b/libavutil/crc.h > @@ -57,6 +57,12 @@ typedef enum { > AV_CRC_MAX, /*< Not part of public API! Do not use outside > libavutil. */ > }AVCRCId; > > +typedef struct AVCRCCtx { > + const AVCRC *crctab; > + uint32_t (*fn)(const AVCRC *crctab, uint32_t crc, const uint8_t *buffer, > + size_t length); > +} AVCRCCtx; > + > /** > * Initialize a CRC table. > * @param ctx must be an array of size sizeof(AVCRC)*257 or > sizeof(AVCRC)*1024 > @@ -95,6 +101,25 @@ const AVCRC *av_crc_get_table(AVCRCId crc_id); > uint32_t av_crc(const AVCRC *ctx, uint32_t crc, > const uint8_t *buffer, size_t length) av_pure; > > +/** > + * Get a AVCRCCtx depends on AVCRCId > + * @param crc_id ID of a standard CRC > + * @param ctx AVCRCCtx which will be set > + * @return < 0 on failure > + */ > +int av_crc_get(AVCRCId crc_id, AVCRCCtx *ctx); > + > +/** > + * Calculate the CRC of a block. > + * @param ctx AVCRCCtx get from av_crc_get() > + * @param crc CRC of previous blocks if any or initial value for CRC > + * @param buffer buffer whose CRC to calculate > + * @param length length of the buffer > + * @return CRC updated with the data from the given block > + */ > +uint32_t av_crc2(AVCRCCtx *ctx, uint32_t crc, > + const uint8_t *buffer, size_t length) av_pure; > + > /** > * @} > */ > diff --git a/libavutil/crc_internal.h b/libavutil/crc_internal.h > new file mode 100644 > index 0000000000..4b0d020f28 > --- /dev/null > +++ b/libavutil/crc_internal.h > @@ -0,0 +1,28 @@ > +/* > + * Copyright (c) 2025 Zhao Zhili <[email protected]> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#ifndef AVUTIL_CRC_INTERNAL_H > +#define AVUTIL_CRC_INTERNAL_H > + > +#include "libavutil/crc.h" > + > +void ff_crc_get_aarch64(AVCRCId crc_id, AVCRCCtx *ctx); > + > +#endif // AVUTIL_CRC_INTERNAL_H > diff --git a/libavutil/version.h b/libavutil/version.h > index 1099715076..176b99aef3 100644 > --- a/libavutil/version.h > +++ b/libavutil/version.h > @@ -79,7 +79,7 @@ > */ > > #define LIBAVUTIL_VERSION_MAJOR 60 > -#define LIBAVUTIL_VERSION_MINOR 13 > +#define LIBAVUTIL_VERSION_MINOR 14 > #define LIBAVUTIL_VERSION_MICRO 100 > > #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile > index 1589a15e2f..a255c8e742 100644 > --- a/tests/checkasm/Makefile > +++ b/tests/checkasm/Makefile > @@ -85,6 +85,7 @@ CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS) > # libavutil tests > AVUTILOBJS += aes.o > AVUTILOBJS += av_tx.o > +AVUTILOBJS += hash.o > AVUTILOBJS += fixed_dsp.o > AVUTILOBJS += float_dsp.o > AVUTILOBJS += lls.o > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > index e59d366f2b..b225259443 100644 > --- a/tests/checkasm/checkasm.c > +++ b/tests/checkasm/checkasm.c > @@ -320,6 +320,7 @@ static const struct { > #endif > #if CONFIG_AVUTIL > { "aes", checkasm_check_aes }, > + { "hash", checkasm_check_hash }, > { "fixed_dsp", checkasm_check_fixed_dsp }, > { "float_dsp", checkasm_check_float_dsp }, > { "lls", checkasm_check_lls }, > @@ -341,6 +342,7 @@ static const struct { > { "I8MM", "i8mm", AV_CPU_FLAG_I8MM }, > { "SVE", "sve", AV_CPU_FLAG_SVE }, > { "SVE2", "sve2", AV_CPU_FLAG_SVE2 }, > + { "CRC32", "crc32", AV_CPU_FLAG_CRC32 }, > #elif ARCH_ARM > { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE }, > { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 }, > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > index eda806e870..6f24158bf4 100644 > --- a/tests/checkasm/checkasm.h > +++ b/tests/checkasm/checkasm.h > @@ -105,6 +105,7 @@ void checkasm_check_h264chroma(void); > void checkasm_check_h264dsp(void); > void checkasm_check_h264pred(void); > void checkasm_check_h264qpel(void); > +void checkasm_check_hash(void); > void checkasm_check_hevc_add_res(void); > void checkasm_check_hevc_deblock(void); > void checkasm_check_hevc_idct(void); > diff --git a/tests/checkasm/hash.c b/tests/checkasm/hash.c > new file mode 100644 > index 0000000000..5169636ef1 > --- /dev/null > +++ b/tests/checkasm/hash.c > @@ -0,0 +1,58 @@ > +/* > + * Copyright (c) 2025 Zhao Zhili <[email protected]> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include "checkasm.h" > +#include "libavutil/crc.h" > + > +#define BUF_SIZE (8192 + 11) > + > +static void check_crc(void) > +{ > + uint8_t buf[BUF_SIZE]; > + > + for (int i = 0; i < sizeof(buf); i++) > + buf[i] = rnd(); > + > + AVCRCCtx ctx; > + uint32_t crc0 = UINT32_MAX; > + uint32_t crc1 = UINT32_MAX; > + av_crc_get(AV_CRC_32_IEEE_LE, &ctx); > + > + declare_func(uint32_t, const AVCRC *crctab, uint32_t crc, > + const uint8_t *buffer, size_t length); > + /* Leave crctab as NULL is an optimization for real usecase, but doesn't > + * work with call_ref. > + */ > + if (!ctx.crctab) > + ctx.crctab = av_crc_get_table(AV_CRC_32_IEEE_LE); > + if (check_func(ctx.fn, "crc_32_ieee_le")) { > + crc0 = call_ref(ctx.crctab, crc0, buf, sizeof(buf)); > + crc1 = call_new(ctx.crctab, crc1, buf, sizeof(buf)); > + if (crc0 != crc1) > + fail(); > + bench_new(ctx.crctab, UINT32_MAX, buf, sizeof(buf)); > + } > +} > + > +void checkasm_check_hash(void) > +{ > + check_crc(); > + report("crc"); > +} > -- > 2.49.1 > > > >From 13ba872d1039eecd0457c3e30e9d53f043ed85e9 Mon Sep 17 00:00:00 2001 > From: Zhao Zhili <[email protected]> > Date: Mon, 6 Oct 2025 18:41:02 +0800 > Subject: [PATCH 3/3] avutil/hash: Use new crc API > > --- > libavutil/hash.c | 13 ++++++++++--- > 1 file changed, 10 insertions(+), 3 deletions(-) > > diff --git a/libavutil/hash.c b/libavutil/hash.c > index fbc24194de..b29de10551 100644 > --- a/libavutil/hash.c > +++ b/libavutil/hash.c > @@ -66,7 +66,7 @@ enum hashtype { > typedef struct AVHashContext { > void *ctx; > enum hashtype type; > - const AVCRC *crctab; > + AVCRCCtx crcctx; > uint32_t crc; > } AVHashContext; > > @@ -137,7 +137,14 @@ int av_hash_alloc(AVHashContext **ctx, const char *name) > case SHA512_256: > case SHA384: > case SHA512: res->ctx = av_sha512_alloc(); break; > - case CRC32: res->crctab = av_crc_get_table(AV_CRC_32_IEEE_LE); break; > + case CRC32: { > + int err = av_crc_get(AV_CRC_32_IEEE_LE, &res->crcctx); > + if (err < 0) { > + av_free(res); > + return err; > + } > + } > + break; > case ADLER32: break; > } > if (i != ADLER32 && i != CRC32 && !res->ctx) { > @@ -185,7 +192,7 @@ void av_hash_update(AVHashContext *ctx, const uint8_t > *src, size_t len) > case SHA512_256: > case SHA384: > case SHA512: av_sha512_update(ctx->ctx, src, len); break; > - case CRC32: ctx->crc = av_crc(ctx->crctab, ctx->crc, src, len); break; > + case CRC32: ctx->crc = av_crc2(&ctx->crcctx, ctx->crc, src, len); > break; > case ADLER32: ctx->crc = av_adler32_update(ctx->crc, src, len); break; > } > } > --
Just FYI: https://github.com/corsix/fast-crc32 The instruction itself isn't the fastest way. Kieran _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
