This is an offshoot of the "CRC32C Parallel Computation Optimization on ARM" thread [0]. I intend for this to be a prerequisite patch set.
Presently, for the SSE 4.2 and ARMv8 CRC instructions used in the CRC32C code for WAL records, etc., we first check if the intrinsics are available with the default compiler flags. If so, we only bother compiling the implementation that uses those intrinsics. If not, we also check whether the intrinsics are available with some extra CFLAGS, and if they are, we compile both the implementation that uses the intrinsics as well as a fallback implementation that doesn't require any special instructions. Then, at runtime, we check what's available in the hardware and choose the appropriate CRC32C implementation. The aforementioned other thread [0] aims to further optimize this code by using another instruction that requires additional configure and/or runtime checks. $SUBJECT has been in the back of my mind for a while, but given proposals to add further complexity to this code, I figured it might be a good time to propose this simplification. Specifically, I think we shouldn't worry about trying to compile only the special instrinics versions, and instead always try to build both and choose the appropriate one at runtime. AFAICT the trade-offs aren't too bad. With some simple testing, I see that the runtime check occurs once at startup, so I don't anticipate any noticeable performance impact. I suppose each process might need to do the check in EXEC_BACKEND builds, but even so, I suspect the difference is negligible. I also see that the SSE 4.2 runtime check requires the CPUID instruction, so we wouldn't use the instrinsics for hardware that supports SSE 4.2 but not CPUID. However, I'm not sure such hardware even exists. Wikipedia says that CPUID was introduced in 1993 [1], and meson.build appears to omit the CPUID check when determining which CRC32C implementation to use. Furthermore, meson.build alludes to problems with some of the CPUID-related checks: # XXX: The configure.ac check for __cpuid() is broken, we don't copy that # here. To prevent problems due to two detection methods working, stop # checking after one. Are there any other reasons that we should try to avoid the runtime check when possible? I've attached two patches. 0001 adds a debug message to the SSE 4.2 runtime check that matches the one already present for the ARMv8 check. This message just notes whether the runtime check found that the special CRC instructions are available. 0002 is a first attempt at $SUBJECT. I've tested it on both x86 and ARM, and it seems to work as intended. You'll notice that I'm still checking for the intrinsics with the default compiler flags first. I didn't see any strong reason to change this, and doing so allows us to avoid sending extra CFLAGS when possible. Thoughts? [0] https://postgr.es/m/DB9PR08MB6991329A73923BF8ED4B3422F5DBA%40DB9PR08MB6991.eurprd08.prod.outlook.com [1] https://en.wikipedia.org/wiki/CPUID -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From 964afc75976cce8d712d97f346d2b8eea9c1f1ee Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Sat, 28 Oct 2023 22:12:45 -0500 Subject: [PATCH v1 1/2] add debug message --- src/port/pg_crc32c_sse42_choose.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 41ff4a35ad..3689c38e92 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -30,10 +30,15 @@ #include "port/pg_crc32c.h" +#ifndef FRONTEND +#include "utils/elog.h" +#endif + static bool pg_crc32c_sse42_available(void) { unsigned int exx[4] = {0, 0, 0, 0}; + bool result; #if defined(HAVE__GET_CPUID) __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); @@ -43,7 +48,13 @@ pg_crc32c_sse42_available(void) #error cpuid instruction not available #endif - return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ + result = ((exx[2] & (1 << 20)) != 0); /* SSE 4.2 */ + +#ifndef FRONTEND + elog(DEBUG1, "using sse42 crc32 hardware = %d", result); +#endif + + return result; } /* -- 2.37.1 (Apple Git-137.1)
>From 7b8efae00327728000f7650a513ab0fd4fb15cd5 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Sat, 28 Oct 2023 21:16:19 -0500 Subject: [PATCH v1 2/2] always use runtime checks for sse4.2/armv8 crc32c code --- configure | 122 ++++++++++------------------------- configure.ac | 104 ++++++++++------------------- meson.build | 31 +++------ src/include/pg_config.h.in | 6 -- src/include/port/pg_crc32c.h | 19 +----- src/port/meson.build | 2 - src/tools/msvc/Solution.pm | 2 - 7 files changed, 80 insertions(+), 206 deletions(-) diff --git a/configure b/configure index cfd968235f..47372dcd18 100755 --- a/configure +++ b/configure @@ -17885,28 +17885,6 @@ fi fi -# Are we targeting a processor that supports SSE 4.2? gcc, clang and icc all -# define __SSE4_2__ in that case. -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ - -#ifndef __SSE4_2__ -#error __SSE4_2__ not defined -#endif - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - SSE4_2_TARGETED=1 -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - # Check for ARMv8 CRC Extension intrinsics to do CRC calculations. # # First check if __crc32c* intrinsics can be used with the default compiler @@ -18040,50 +18018,36 @@ fi # Select CRC-32C implementation. # -# If we are targeting a processor that has Intel SSE 4.2 instructions, we can -# use the special CRC instructions for calculating CRC-32C. If we're not -# targeting such a processor, but we can nevertheless produce code that uses -# the SSE intrinsics, perhaps with some extra CFLAGS, compile both +# If we are targeting a processor that has Intel SSE 4.2 instructions, or we're +# not targeting such a processor, but we can nevertheless produce code that +# uses the SSE intrinsics, perhaps with some extra CFLAGS, compile both # implementations and select which one to use at runtime, depending on whether # SSE 4.2 is supported by the processor we're running on. # # Similarly, if we are targeting an ARM processor that has the CRC -# instructions that are part of the ARMv8 CRC Extension, use them. And if -# we're not targeting such a processor, but can nevertheless produce code that -# uses the CRC instructions, compile both, and select at runtime. -# -# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1 -# in the template or configure command line. +# instructions that are part of the ARMv8 CRC Extension, or if we're not +# targeting such a processor, but can nevertheless produce code that uses the +# CRC instructions, compile both, and select at runtime. # # If we are targeting a LoongArch processor, CRC instructions are # always available (at least on 64 bit), so no runtime check is needed. -if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then - # Use Intel SSE 4.2 if available. - if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then - USE_SSE42_CRC32C=1 - else - # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for - # the runtime check. - if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then +if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then + # Use Intel SSE 4.2 if available, with runtime check. The CPUID instruction + # is needed for the runtime check. + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 + else + # Use ARM CRC Extension if available, with runtime check. + if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then + USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 else - # Use ARM CRC Extension if available. - if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_CRC" = x""; then - USE_ARMV8_CRC32C=1 + # LoongArch CRCC instructions. + if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then + USE_LOONGARCH_CRC32C=1 else - # ARM CRC Extension, with runtime check? - if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then - USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 - else - # LoongArch CRCC instructions. - if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then - USE_LOONGARCH_CRC32C=1 - else - # fall back to slicing-by-8 algorithm, which doesn't require any - # special CPU support. - USE_SLICING_BY_8_CRC32C=1 - fi - fi + # fall back to slicing-by-8 algorithm, which doesn't require any + # special CPU support. + USE_SLICING_BY_8_CRC32C=1 fi fi fi @@ -18092,54 +18056,36 @@ fi # Set PG_CRC32C_OBJS appropriately depending on the selected implementation. { $as_echo "$as_me:${as_lineno-$LINENO}: checking which CRC-32C implementation to use" >&5 $as_echo_n "checking which CRC-32C implementation to use... " >&6; } -if test x"$USE_SSE42_CRC32C" = x"1"; then - -$as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h - - PG_CRC32C_OBJS="pg_crc32c_sse42.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5 -$as_echo "SSE 4.2" >&6; } -else - if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then +if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 $as_echo "SSE 4.2 with runtime check" >&6; } - else - if test x"$USE_ARMV8_CRC32C" = x"1"; then - -$as_echo "#define USE_ARMV8_CRC32C 1" >>confdefs.h - - PG_CRC32C_OBJS="pg_crc32c_armv8.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions" >&5 -$as_echo "ARMv8 CRC instructions" >&6; } - else - if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then +else + if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then $as_echo "#define USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions with runtime check" >&5 + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions with runtime check" >&5 $as_echo "ARMv8 CRC instructions with runtime check" >&6; } - else - if test x"$USE_LOONGARCH_CRC32C" = x"1"; then + else + if test x"$USE_LOONGARCH_CRC32C" = x"1"; then $as_echo "#define USE_LOONGARCH_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_loongarch.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: LoongArch CRCC instructions" >&5 + PG_CRC32C_OBJS="pg_crc32c_loongarch.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: LoongArch CRCC instructions" >&5 $as_echo "LoongArch CRCC instructions" >&6; } - else + else $as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sb8.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 $as_echo "slicing-by-8" >&6; } - fi - fi fi fi fi diff --git a/configure.ac b/configure.ac index f220b379b3..10286e415b 100644 --- a/configure.ac +++ b/configure.ac @@ -2081,14 +2081,6 @@ if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then PGAC_SSE42_CRC32_INTRINSICS([-msse4.2]) fi -# Are we targeting a processor that supports SSE 4.2? gcc, clang and icc all -# define __SSE4_2__ in that case. -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [ -#ifndef __SSE4_2__ -#error __SSE4_2__ not defined -#endif -])], [SSE4_2_TARGETED=1]) - # Check for ARMv8 CRC Extension intrinsics to do CRC calculations. # # First check if __crc32c* intrinsics can be used with the default compiler @@ -2109,50 +2101,36 @@ AC_SUBST(CFLAGS_CRC) # Select CRC-32C implementation. # -# If we are targeting a processor that has Intel SSE 4.2 instructions, we can -# use the special CRC instructions for calculating CRC-32C. If we're not -# targeting such a processor, but we can nevertheless produce code that uses -# the SSE intrinsics, perhaps with some extra CFLAGS, compile both +# If we are targeting a processor that has Intel SSE 4.2 instructions, or we're +# not targeting such a processor, but we can nevertheless produce code that +# uses the SSE intrinsics, perhaps with some extra CFLAGS, compile both # implementations and select which one to use at runtime, depending on whether # SSE 4.2 is supported by the processor we're running on. # # Similarly, if we are targeting an ARM processor that has the CRC -# instructions that are part of the ARMv8 CRC Extension, use them. And if -# we're not targeting such a processor, but can nevertheless produce code that -# uses the CRC instructions, compile both, and select at runtime. -# -# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1 -# in the template or configure command line. +# instructions that are part of the ARMv8 CRC Extension, or if we're not +# targeting such a processor, but can nevertheless produce code that uses the +# CRC instructions, compile both, and select at runtime. # # If we are targeting a LoongArch processor, CRC instructions are # always available (at least on 64 bit), so no runtime check is needed. -if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then - # Use Intel SSE 4.2 if available. - if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then - USE_SSE42_CRC32C=1 - else - # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for - # the runtime check. - if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then +if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then + # Use Intel SSE 4.2 if available, with runtime check. The CPUID instruction + # is needed for the runtime check. + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 + else + # Use ARM CRC Extension if available, with runtime check. + if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then + USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 else - # Use ARM CRC Extension if available. - if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_CRC" = x""; then - USE_ARMV8_CRC32C=1 + # LoongArch CRCC instructions. + if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then + USE_LOONGARCH_CRC32C=1 else - # ARM CRC Extension, with runtime check? - if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then - USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 - else - # LoongArch CRCC instructions. - if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then - USE_LOONGARCH_CRC32C=1 - else - # fall back to slicing-by-8 algorithm, which doesn't require any - # special CPU support. - USE_SLICING_BY_8_CRC32C=1 - fi - fi + # fall back to slicing-by-8 algorithm, which doesn't require any + # special CPU support. + USE_SLICING_BY_8_CRC32C=1 fi fi fi @@ -2160,36 +2138,24 @@ fi # Set PG_CRC32C_OBJS appropriately depending on the selected implementation. AC_MSG_CHECKING([which CRC-32C implementation to use]) -if test x"$USE_SSE42_CRC32C" = x"1"; then - AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o" - AC_MSG_RESULT(SSE 4.2) +if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" + AC_MSG_RESULT(SSE 4.2 with runtime check) else - if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then - AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" - AC_MSG_RESULT(SSE 4.2 with runtime check) + if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + AC_DEFINE(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARMv8 CRC Extension with a runtime check.]) + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" + AC_MSG_RESULT(ARMv8 CRC instructions with runtime check) else - if test x"$USE_ARMV8_CRC32C" = x"1"; then - AC_DEFINE(USE_ARMV8_CRC32C, 1, [Define to 1 to use ARMv8 CRC Extension.]) - PG_CRC32C_OBJS="pg_crc32c_armv8.o" - AC_MSG_RESULT(ARMv8 CRC instructions) + if test x"$USE_LOONGARCH_CRC32C" = x"1"; then + AC_DEFINE(USE_LOONGARCH_CRC32C, 1, [Define to 1 to use LoongArch CRCC instructions.]) + PG_CRC32C_OBJS="pg_crc32c_loongarch.o" + AC_MSG_RESULT(LoongArch CRCC instructions) else - if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then - AC_DEFINE(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARMv8 CRC Extension with a runtime check.]) - PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" - AC_MSG_RESULT(ARMv8 CRC instructions with runtime check) - else - if test x"$USE_LOONGARCH_CRC32C" = x"1"; then - AC_DEFINE(USE_LOONGARCH_CRC32C, 1, [Define to 1 to use LoongArch CRCC instructions.]) - PG_CRC32C_OBJS="pg_crc32c_loongarch.o" - AC_MSG_RESULT(LoongArch CRCC instructions) - else - AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use software CRC-32C implementation (slicing-by-8).]) - PG_CRC32C_OBJS="pg_crc32c_sb8.o" - AC_MSG_RESULT(slicing-by-8) - fi - fi + AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use software CRC-32C implementation (slicing-by-8).]) + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + AC_MSG_RESULT(slicing-by-8) fi fi fi diff --git a/meson.build b/meson.build index 2d516c8f37..65e00e1538 100644 --- a/meson.build +++ b/meson.build @@ -1983,17 +1983,16 @@ endif ############################################################### # Select CRC-32C implementation. # -# If we are targeting a processor that has Intel SSE 4.2 instructions, we can -# use the special CRC instructions for calculating CRC-32C. If we're not -# targeting such a processor, but we can nevertheless produce code that uses -# the SSE intrinsics, perhaps with some extra CFLAGS, compile both +# If we are targeting a processor that has Intel SSE 4.2 instructions, or we're +# not targeting such a processor, but we can nevertheless produce code that +# uses the SSE intrinsics, perhaps with some extra CFLAGS, compile both # implementations and select which one to use at runtime, depending on whether # SSE 4.2 is supported by the processor we're running on. # # Similarly, if we are targeting an ARM processor that has the CRC -# instructions that are part of the ARMv8 CRC Extension, use them. And if -# we're not targeting such a processor, but can nevertheless produce code that -# uses the CRC instructions, compile both, and select at runtime. +# instructions that are part of the ARMv8 CRC Extension, or if we're not +# targeting such a processor, but can nevertheless produce code that uses the +# CRC instructions, compile both, and select at runtime. ############################################################### have_optimized_crc = false @@ -2001,7 +2000,6 @@ cflags_crc = [] if host_cpu == 'x86' or host_cpu == 'x86_64' if cc.get_id() == 'msvc' - cdata.set('USE_SSE42_CRC32C', false) cdata.set('USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 1) have_optimized_crc = true else @@ -2020,16 +2018,12 @@ int main(void) ''' if cc.links(prog, name: '_mm_crc32_u8 and _mm_crc32_u32 without -msse4.2', - args: test_c_args) - # Use Intel SSE 4.2 unconditionally. - cdata.set('USE_SSE42_CRC32C', 1) - have_optimized_crc = true - elif cc.links(prog, name: '_mm_crc32_u8 and _mm_crc32_u32 with -msse4.2', + args: test_c_args) or \ + cc.links(prog, name: '_mm_crc32_u8 and _mm_crc32_u32 with -msse4.2', args: test_c_args + ['-msse4.2']) # Use Intel SSE 4.2, with runtime check. The CPUID instruction is needed for # the runtime check. cflags_crc += '-msse4.2' - cdata.set('USE_SSE42_CRC32C', false) cdata.set('USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 1) have_optimized_crc = true endif @@ -2055,15 +2049,10 @@ int main(void) ''' if cc.links(prog, name: '__crc32cb, __crc32ch, __crc32cw, and __crc32cd without -march=armv8-a+crc', - args: test_c_args) - # Use ARM CRC Extension unconditionally - cdata.set('USE_ARMV8_CRC32C', 1) - have_optimized_crc = true - elif cc.links(prog, name: '__crc32cb, __crc32ch, __crc32cw, and __crc32cd with -march=armv8-a+crc', + args: test_c_args) or \ + cc.links(prog, name: '__crc32cb, __crc32ch, __crc32cw, and __crc32cd with -march=armv8-a+crc', args: test_c_args + ['-march=armv8-a+crc']) # Use ARM CRC Extension, with runtime check - cflags_crc += '-march=armv8-a+crc' - cdata.set('USE_ARMV8_CRC32C', false) cdata.set('USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 1) have_optimized_crc = true endif diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index d8a2985567..dd537c3169 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -683,9 +683,6 @@ /* Define to 1 if strerror_r() returns int. */ #undef STRERROR_R_INT -/* Define to 1 to use ARMv8 CRC Extension. */ -#undef USE_ARMV8_CRC32C - /* Define to 1 to use ARMv8 CRC Extension with a runtime check. */ #undef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK @@ -732,9 +729,6 @@ /* Define to 1 to use software CRC-32C implementation (slicing-by-8). */ #undef USE_SLICING_BY_8_CRC32C -/* Define to 1 use Intel SSE 4.2 CRC instructions. */ -#undef USE_SSE42_CRC32C - /* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */ #undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index d085f1dc00..cd2ccebfd6 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -41,24 +41,7 @@ typedef uint32 pg_crc32c; #define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF) #define EQ_CRC32C(c1, c2) ((c1) == (c2)) -#if defined(USE_SSE42_CRC32C) -/* Use Intel SSE4.2 instructions. */ -#define COMP_CRC32C(crc, data, len) \ - ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) -#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) - -extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); - -#elif defined(USE_ARMV8_CRC32C) -/* Use ARMv8 CRC Extension instructions. */ - -#define COMP_CRC32C(crc, data, len) \ - ((crc) = pg_comp_crc32c_armv8((crc), (data), (len))) -#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) - -extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); - -#elif defined(USE_LOONGARCH_CRC32C) +#if defined(USE_LOONGARCH_CRC32C) /* Use LoongArch CRCC instructions. */ #define COMP_CRC32C(crc, data, len) \ diff --git a/src/port/meson.build b/src/port/meson.build index a0d0a9583a..6fc8ac9953 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -81,13 +81,11 @@ endif # is true replace_funcs_pos = [ # x86/x64 - ['pg_crc32c_sse42', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 'crc'], ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], # arm / aarch64 - ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'], ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 'crc'], ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index a50f730260..496f11bf54 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -421,7 +421,6 @@ sub GenerateFiles SIZEOF_VOID_P => $bits / 8, STDC_HEADERS => 1, STRERROR_R_INT => undef, - USE_ARMV8_CRC32C => undef, USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK => undef, USE_ASSERT_CHECKING => $self->{options}->{asserts} ? 1 : undef, USE_BONJOUR => undef, @@ -437,7 +436,6 @@ sub GenerateFiles USE_OPENSSL => undef, USE_PAM => undef, USE_SLICING_BY_8_CRC32C => undef, - USE_SSE42_CRC32C => undef, USE_SSE42_CRC32C_WITH_RUNTIME_CHECK => 1, USE_SYSTEMD => undef, USE_SYSV_SEMAPHORES => undef, -- 2.37.1 (Apple Git-137.1)