On Thu, Apr 2, 2026 at 11:17 PM Nathan Bossart <[email protected]> wrote: > > On Thu, Apr 02, 2026 at 10:53:24AM -0500, Nathan Bossart wrote: > > I think the new pg_comp_crc32_choose() is infinitely recursing on macOS > > because USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK is not defined but > > pg_crc32c_armv8_available() returns false. If I trace through that > > function, I see that it's going straight to the > > > > #else > > return false; > > #endif > > > > at the end. And sure enough, both HAVE_ELF_AUX_INFO and HAVE_GETAUXVAL
Ah of course. > > aren't defined in pg_config.h. I think we might need to use sysctlbyname() > > to determine PMULL support on macOS, but at this stage of the development > > cycle, I would probably lean towards just compiling in the sb8 > > implementation. > > Hm. On second thought, that probably regresses macOS builds because it was > presumably using the armv8 path without runtime checks before... I went with the following for v5, and it passes MacOS on my Github CI: + /* set fallbacks */ +#ifdef USE_ARMV8_CRC32C + /* On e.g. MacOS, our runtime feature detection doesn't work */ + pg_comp_crc32c = pg_comp_crc32c_armv8; +#else + pg_comp_crc32c = pg_comp_crc32c_sb8; +#endif + [...crc and pmull checks] That should keep scalar hardware support working, but now it'll only use direct calls for constant inputs. I also did some benchmarking on an ARM Neoverse N1 / gcc 8.3 (attached). There the vector loop still works well all the way down to the minimum input size of 64 bytes, and on long inputs it's almost twice as fast as scalar. For reproduceability, I slightly modified the benchmark we used last year, to make sure the input is aligned (attached but not for CI). In the end, I want to add a length check so that inputs smaller than 80 bytes go straight to the scalar path. Above 80, after alignment adjustments in the preamble, that still guarantees at least one loop iteration in the vector path. -- John Naylor Amazon Web Services
master: 32 latency average = 9.758 ms latency average = 9.449 ms latency average = 9.475 ms 64 latency average = 12.131 ms latency average = 12.276 ms latency average = 12.200 ms 96 latency average = 14.837 ms latency average = 14.834 ms latency average = 14.844 ms 128 latency average = 17.589 ms latency average = 17.499 ms latency average = 17.516 ms 160 latency average = 20.177 ms latency average = 20.292 ms latency average = 20.176 ms 192 latency average = 22.895 ms latency average = 22.879 ms latency average = 22.860 ms 224 latency average = 25.625 ms latency average = 25.609 ms latency average = 25.600 ms 256 latency average = 28.206 ms latency average = 28.213 ms latency average = 28.275 ms 8192 latency average = 698.934 ms latency average = 712.760 ms latency average = 700.519 ms v5: 32 latency average = 10.099 ms latency average = 10.241 ms latency average = 10.209 ms 64 latency average = 10.260 ms latency average = 10.232 ms latency average = 10.220 ms 96 latency average = 12.234 ms latency average = 12.297 ms latency average = 12.475 ms 128 latency average = 13.149 ms latency average = 13.380 ms latency average = 13.093 ms 160 latency average = 14.228 ms latency average = 13.829 ms latency average = 14.450 ms 192 latency average = 15.383 ms latency average = 15.171 ms latency average = 15.173 ms 224 latency average = 16.908 ms latency average = 16.746 ms latency average = 16.546 ms 256 latency average = 18.130 ms latency average = 18.271 ms latency average = 18.150 ms 8192 latency average = 410.864 ms latency average = 408.692 ms latency average = 401.145 ms
test-crc.sh
Description: application/shellscript
From 8adcb9c0305bffb1ce33cab3e4ceec984d72b351 Mon Sep 17 00:00:00 2001 From: John Naylor <[email protected]> Date: Tue, 31 Mar 2026 17:40:38 +0700 Subject: [PATCH v5] Compute CRC32C on ARM using the Crypto Extension where available --- config/c-compiler.m4 | 41 ++++++++++ configure | 67 +++++++++++++++- configure.ac | 13 ++- meson.build | 33 ++++++++ src/include/pg_config.h.in | 3 + src/include/port/pg_crc32c.h | 22 ++++-- src/port/meson.build | 1 + src/port/pg_crc32c_armv8.c | 127 ++++++++++++++++++++++++++++++ src/port/pg_crc32c_armv8_choose.c | 38 ++++++++- 9 files changed, 334 insertions(+), 11 deletions(-) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 629572ee350..f8a9a69f20a 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -759,6 +759,47 @@ fi undefine([Ac_cachevar])dnl ])# PGAC_ARMV8_CRC32C_INTRINSICS +# PGAC_ARM_PLMULL +# --------------------------- +# Check if the compiler supports Arm CRYPTO PMULL (carryless multiplication) +# instructions used for vectorized CRC. +# +# If the instructions are supported, sets pgac_arm_pmull. +AC_DEFUN([PGAC_ARM_PLMULL], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_arm_pmull_$1])])dnl +AC_CACHE_CHECK([for pmull and pmull2], [Ac_cachevar], +[pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS" +AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h> +#include <arm_neon.h> +uint64x2_t a; +uint64x2_t b; +uint64x2_t c; +uint64x2_t r1; +uint64x2_t r2; + + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("+crypto"))) + #endif + static int pmull_test(void) + { + __asm("pmull %0.1q, %2.1d, %3.1d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r1), "+w"(c):"w"(a), "w"(b)); + __asm("pmull2 %0.1q, %2.2d, %3.2d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r2), "+w"(c):"w"(a), "w"(b)); + + r1 = veorq_u64(r1, r2); + /* return computed value, to prevent the above being optimized away */ + return (int) vgetq_lane_u64(r1, 0); + }], + [return pmull_test();])], + [Ac_cachevar=yes], + [Ac_cachevar=no]) +CFLAGS="$pgac_save_CFLAGS"]) +if test x"$Ac_cachevar" = x"yes"; then + pgac_arm_pmull=yes +fi +undefine([Ac_cachevar])dnl +])# PGAC_ARM_PLMULL + # PGAC_LOONGARCH_CRC32C_INTRINSICS # --------------------------- # Check if the compiler supports the LoongArch CRCC instructions, using diff --git a/configure b/configure index fe22bc71d0c..a1ed54d2439 100755 --- a/configure +++ b/configure @@ -18314,7 +18314,7 @@ $as_echo "SSE 4.2 with runtime check" >&6; } $as_echo "#define USE_ARMV8_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_armv8.o" + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_armv8_choose.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions" >&5 $as_echo "ARMv8 CRC instructions" >&6; } else @@ -18399,6 +18399,61 @@ if test x"$pgac_cv_avx512_pclmul_intrinsics" = x"yes"; then pgac_avx512_pclmul_intrinsics=yes fi +else + if test x"$host_cpu" = x"aarch64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pmull and pmull2" >&5 +$as_echo_n "checking for pmull and pmull2... " >&6; } +if ${pgac_cv_arm_pmull_+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <arm_acle.h> +#include <arm_neon.h> +uint64x2_t a; +uint64x2_t b; +uint64x2_t c; +uint64x2_t r1; +uint64x2_t r2; + + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("+crypto"))) + #endif + static int pmull_test(void) + { + __asm("pmull %0.1q, %2.1d, %3.1d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r1), "+w"(c):"w"(a), "w"(b)); + __asm("pmull2 %0.1q, %2.2d, %3.2d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r2), "+w"(c):"w"(a), "w"(b)); + + r1 = veorq_u64(r1, r2); + /* return computed value, to prevent the above being optimized away */ + return (int) vgetq_lane_u64(r1, 0); + } +int +main () +{ +return pmull_test(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_arm_pmull_=yes +else + pgac_cv_arm_pmull_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm_pmull_" >&5 +$as_echo "$pgac_cv_arm_pmull_" >&6; } +if test x"$pgac_cv_arm_pmull_" = x"yes"; then + pgac_arm_pmull=yes +fi + + fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for vectorized CRC-32C" >&5 @@ -18410,8 +18465,16 @@ $as_echo "#define USE_AVX512_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h { $as_echo "$as_me:${as_lineno-$LINENO}: result: AVX-512 with runtime check" >&5 $as_echo "AVX-512 with runtime check" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 + if test x"$pgac_arm_pmull" = x"yes"; then + +$as_echo "#define USE_PMULL_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: CRYPTO PMULL with runtime check" >&5 +$as_echo "CRYPTO PMULL with runtime check" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 $as_echo "none" >&6; } + fi fi # Select semaphore implementation type. diff --git a/configure.ac b/configure.ac index 6873b7546dd..afea6118eb1 100644 --- a/configure.ac +++ b/configure.ac @@ -2268,7 +2268,7 @@ else else if test x"$USE_ARMV8_CRC32C" = x"1"; then AC_DEFINE(USE_ARMV8_CRC32C, 1, [Define to 1 to use ARMv8 CRC Extension.]) - PG_CRC32C_OBJS="pg_crc32c_armv8.o" + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_armv8_choose.o" AC_MSG_RESULT(ARMv8 CRC instructions) else if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then @@ -2295,6 +2295,10 @@ AC_SUBST(PG_CRC32C_OBJS) # if test x"$host_cpu" = x"x86_64"; then PGAC_AVX512_PCLMUL_INTRINSICS() +else + if test x"$host_cpu" = x"aarch64"; then + PGAC_ARM_PLMULL() + fi fi AC_MSG_CHECKING([for vectorized CRC-32C]) @@ -2302,7 +2306,12 @@ if test x"$pgac_avx512_pclmul_intrinsics" = x"yes"; then AC_DEFINE(USE_AVX512_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX-512 CRC algorithms with a runtime check.]) AC_MSG_RESULT(AVX-512 with runtime check) else - AC_MSG_RESULT(none) + if test x"$pgac_arm_pmull" = x"yes"; then + AC_DEFINE(USE_PMULL_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Arm PMULL CRC algorithms with a runtime check.]) + AC_MSG_RESULT(CRYPTO PMULL with runtime check) + else + AC_MSG_RESULT(none) + fi fi # Select semaphore implementation type. diff --git a/meson.build b/meson.build index 6bc74c2ba79..6d7f697583f 100644 --- a/meson.build +++ b/meson.build @@ -2720,6 +2720,39 @@ int main(void) have_optimized_crc = true endif + # Check if the compiler supports Arm CRYPTO PMULL (carryless multiplication) + # instructions used for vectorized CRC. + prog = ''' +#include <arm_acle.h> +#include <arm_neon.h> +uint64x2_t a; +uint64x2_t b; +uint64x2_t c; + +#if defined(__has_attribute) && __has_attribute (target) +__attribute__((target("+crypto"))) +#endif +int main(void) +{ + uint64x2_t r1; + uint64x2_t r2; + + __asm("pmull %0.1q, %2.1d, %3.1d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r1), "+w"(c):"w"(a), "w"(b)); + __asm("pmull2 %0.1q, %2.2d, %3.2d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r2), "+w"(c):"w"(a), "w"(b)); + + r1 = veorq_u64(r1, r2); + /* return computed value, to prevent the above being optimized away */ + return (int) vgetq_lane_u64(r1, 0); +} +''' + + if cc.links(prog, + name: 'CRYPTO CRC32C', + args: test_c_args) + # Use ARM CRYPTO Extension, with runtime check + cdata.set('USE_PMULL_CRC32C_WITH_RUNTIME_CHECK', 1) + endif + elif host_cpu == 'loongarch64' prog = ''' diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index d8d61918aff..dbc97c565a3 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -729,6 +729,9 @@ /* Define to 1 to build with PAM support. (--with-pam) */ #undef USE_PAM +/* Define to 1 to use Arm PMULL CRC algorithms with a runtime check. */ +#undef USE_PMULL_CRC32C_WITH_RUNTIME_CHECK + /* Define to 1 to use software CRC-32C implementation (slicing-by-8). */ #undef USE_SLICING_BY_8_CRC32C diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 1f8e837d119..10518614664 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -111,13 +111,22 @@ extern pg_crc32c pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t l #endif #elif defined(USE_ARMV8_CRC32C) -/* Use ARMv8 CRC Extension instructions. */ - +/* + * Use either ARMv8 CRC Extension or CRYPTO Extension (PMULL) instructions. + * We don't need a runtime check for CRC, so for constant inputs, where + * we assume the input is small, we can avoid an indirect function call. + */ #define COMP_CRC32C(crc, data, len) \ - ((crc) = pg_comp_crc32c_armv8((crc), (data), (len))) + ((crc) = __builtin_constant_p(len) ? \ + pg_comp_crc32c_armv8((crc), (data), (len)) : \ + pg_comp_crc32c((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); +#ifdef USE_PMULL_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_pmull(pg_crc32c crc, const void *data, size_t len); +#endif #elif defined(USE_LOONGARCH_CRC32C) /* Use LoongArch CRCC instructions. */ @@ -131,8 +140,8 @@ extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_ #elif defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) /* - * Use ARMv8 instructions, but perform a runtime check first - * to check that they are available. + * Use either ARMv8 CRC Extension or CRYPTO Extension (PMULL) instructions, + * but perform a runtime check first to check that they are available. */ #define COMP_CRC32C(crc, data, len) \ ((crc) = pg_comp_crc32c((crc), (data), (len))) @@ -141,6 +150,9 @@ extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_ extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); +#ifdef USE_PMULL_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_pmull(pg_crc32c crc, const void *data, size_t len); +#endif #else /* diff --git a/src/port/meson.build b/src/port/meson.build index d55cb0424f3..922b3f64676 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -93,6 +93,7 @@ replace_funcs_pos = [ # arm / aarch64 ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'], ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 'crc'], + ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C'], ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], diff --git a/src/port/pg_crc32c_armv8.c b/src/port/pg_crc32c_armv8.c index 9ca0f728d39..aa0089a431c 100644 --- a/src/port/pg_crc32c_armv8.c +++ b/src/port/pg_crc32c_armv8.c @@ -20,6 +20,10 @@ #include <arm_acle.h> #endif +#ifdef USE_PMULL_CRC32C_WITH_RUNTIME_CHECK +#include <arm_neon.h> +#endif + #include "port/pg_crc32c.h" pg_crc32c @@ -77,3 +81,126 @@ pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len) return crc; } + +#ifdef USE_PMULL_CRC32C_WITH_RUNTIME_CHECK + +/* + * Note: There is no copyright notice in the following generated code. + * + * We have modified the output to + * - match our function declaration + * - match whitespace to our project style + * - be more friendly for pgindent + */ + +/* Generated by https://github.com/corsix/fast-crc32/ using: */ +/* ./generate -i neon -p crc32c -a v4e */ +/* MIT licensed */ + +pg_attribute_target("+crypto") +static inline +uint64x2_t +clmul_lo_e(uint64x2_t a, uint64x2_t b, uint64x2_t c) +{ + uint64x2_t r; + +__asm("pmull %0.1q, %2.1d, %3.1d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r), "+w"(c):"w"(a), "w"(b)); + return r; +} + +pg_attribute_target("+crypto") +static inline +uint64x2_t +clmul_hi_e(uint64x2_t a, uint64x2_t b, uint64x2_t c) +{ + uint64x2_t r; + +__asm("pmull2 %0.1q, %2.2d, %3.2d\neor %0.16b, %0.16b, %1.16b\n":"=w"(r), "+w"(c):"w"(a), "w"(b)); + return r; +} + +pg_attribute_target("+crypto") +pg_crc32c +pg_comp_crc32c_pmull(pg_crc32c crc, const void *data, size_t len) +{ + /* adjust names to match generated code */ + pg_crc32c crc0 = crc; + const char *buf = data; + + /* align to 16 bytes */ + for (; len && ((uintptr_t) buf & 7); --len) + { + crc0 = __crc32cb(crc0, *buf++); + } + if (((uintptr_t) buf & 8) && len >= 8) + { + crc0 = __crc32cd(crc0, *(const uint64_t *) buf); + buf += 8; + len -= 8; + } + + if (len >= 64) + { + const char *end = buf + len; + const char *limit = buf + len - 64; + + /* First vector chunk. */ + uint64x2_t x0 = vld1q_u64((const uint64_t *) buf), + y0; + uint64x2_t x1 = vld1q_u64((const uint64_t *) (buf + 16)), + y1; + uint64x2_t x2 = vld1q_u64((const uint64_t *) (buf + 32)), + y2; + uint64x2_t x3 = vld1q_u64((const uint64_t *) (buf + 48)), + y3; + uint64x2_t k; + + { + static const uint64_t pg_attribute_aligned(16) k_[] = {0x740eef02, 0x9e4addf8}; + + k = vld1q_u64(k_); + } + + /* + * pgindent complained of unmatched parens upstream: + * + * x0 = veorq_u64((uint64x2_t) {crc0, 0}, x0); + */ + x0 = veorq_u64((uint64x2_t) vsetq_lane_u64(crc0, vdupq_n_u64(0), 0), x0); + buf += 64; + + /* Main loop. */ + while (buf <= limit) + { + y0 = clmul_lo_e(x0, k, vld1q_u64((const uint64_t *) buf)), x0 = clmul_hi_e(x0, k, y0); + y1 = clmul_lo_e(x1, k, vld1q_u64((const uint64_t *) (buf + 16))), x1 = clmul_hi_e(x1, k, y1); + y2 = clmul_lo_e(x2, k, vld1q_u64((const uint64_t *) (buf + 32))), x2 = clmul_hi_e(x2, k, y2); + y3 = clmul_lo_e(x3, k, vld1q_u64((const uint64_t *) (buf + 48))), x3 = clmul_hi_e(x3, k, y3); + buf += 64; + } + + /* Reduce x0 ... x3 to just x0. */ + { + static const uint64_t pg_attribute_aligned(16) k_[] = {0xf20c0dfe, 0x493c7d27}; + + k = vld1q_u64(k_); + } + y0 = clmul_lo_e(x0, k, x1), x0 = clmul_hi_e(x0, k, y0); + y2 = clmul_lo_e(x2, k, x3), x2 = clmul_hi_e(x2, k, y2); + { + static const uint64_t pg_attribute_aligned(16) k_[] = {0x3da6d0cb, 0xba4fc28e}; + + k = vld1q_u64(k_); + } + y0 = clmul_lo_e(x0, k, x2), x0 = clmul_hi_e(x0, k, y0); + + /* Reduce 128 bits to 32 bits, and multiply by x^32. */ + crc0 = __crc32cd(0, vgetq_lane_u64(x0, 0)); + crc0 = __crc32cd(crc0, vgetq_lane_u64(x0, 1)); + len = end - buf; + } + + return pg_comp_crc32c_armv8(crc0, buf, len); +} + +#endif diff --git a/src/port/pg_crc32c_armv8_choose.c b/src/port/pg_crc32c_armv8_choose.c index a1f0e540c6b..72d70aea1e1 100644 --- a/src/port/pg_crc32c_armv8_choose.c +++ b/src/port/pg_crc32c_armv8_choose.c @@ -108,6 +108,27 @@ pg_crc32c_armv8_available(void) #endif } +static inline bool +pg_pmull_available(void) +{ +#if defined(__aarch64__) && defined(HWCAP_PMULL) + +#ifdef HAVE_ELF_AUX_INFO + unsigned long value; + + return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 && + (value & HWCAP_PMULL) != 0; +#elif defined(HAVE_GETAUXVAL) + return (getauxval(AT_HWCAP) & HWCAP_PMULL) != 0; +#else + return false; +#endif + +#else + return false; +#endif +} + /* * This gets called on the first call. It replaces the function pointer * so that subsequent calls are routed directly to the chosen implementation. @@ -115,10 +136,23 @@ pg_crc32c_armv8_available(void) static pg_crc32c pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) { + /* set fallbacks */ +#ifdef USE_ARMV8_CRC32C + /* On e.g. MacOS, our runtime feature detection doesn't work */ + pg_comp_crc32c = pg_comp_crc32c_armv8; +#else + pg_comp_crc32c = pg_comp_crc32c_sb8; +#endif + if (pg_crc32c_armv8_available()) + { pg_comp_crc32c = pg_comp_crc32c_armv8; - else - pg_comp_crc32c = pg_comp_crc32c_sb8; + +#ifdef USE_PMULL_CRC32C_WITH_RUNTIME_CHECK + if (pg_pmull_available()) + pg_comp_crc32c = pg_comp_crc32c_pmull; +#endif + } return pg_comp_crc32c(crc, data, len); } -- 2.53.0
v503-0002-Add-a-Postgres-SQL-function-for-crc32c-benchmar.patch.nocfbot
Description: Binary data
