On 02/03/18 06:42, Andres Freund wrote:
On 2018-03-02 11:37:52 +1300, Thomas Munro wrote:
So... that stuff probably needs either a configure check for the
getauxval function and/or those headers, or an OS check?

It'd probably be better to not rely on os specific headers, and instead
directly access the capabilities.

Anyone got an idea on how to do that? I googled around a bit, but couldn't find any examples. All the examples I could find very Linux-specific, and used getauxval(), except for this in the FreeBSD kernel itself: https://github.com/freebsd/freebsd/blob/master/sys/libkern/crc32.c#L775. I'm no expert on FreeBSD, but that doesn't seem suitable for use in a user program.

In any case, I reworked this patch to follow the example of the existing code more closely. Notable changes:

* Use compiler intrinsics instead of inline assembly.

* If the target architecture has them, use the CRC instructions without a runtime check. You'll get that if you use "CFLAGS=armv8.1-a", for example, as the CRC Extension was made mandatory in ARM v8.1. This should work even on FreeBSD or other non-Linux systems, where getauxval() is not available.

* I removed the loop to handle two uint64's at a time, using the LDP instruction. I couldn't find a compiler intrinsic for that, and it was actually slower, at least on the system I have access to, than a straightforward loop that processes 8 bytes at a time.

* I tested this on Linux, with gcc and clang, on an ARM64 virtual machine that I had available (not an emulator, but a VM on a shared ARM64 server).

- Heikki
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 689bb7f181..d530cf92c0 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -627,3 +627,34 @@ if test x"$Ac_cachevar" = x"yes"; then
 fi
 undefine([Ac_cachevar])dnl
 ])# PGAC_SSE42_CRC32_INTRINSICS
+
+
+# PGAC_ARM64CE_CRC32C_INTRINSICS
+# -----------------------
+# Check if the compiler supports the ARM64CE CRC32C instructions added in XXX
+# using the __crc32cb, __crc32ch, __crc32cw, and __crc32cd intrinsic functions.
+#
+# An optional compiler flag can be passed as argument (e.g. -march=+crc). If the
+# intrinsics are supported, sets pgac_arm64ce_crc32c_intrinsics, and CFLAGS_ARM64CE_CRC32C.
+AC_DEFUN([PGAC_ARM64CE_CRC32C_INTRINSICS],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_arm64ce_crc32c_intrinsics_$1])])dnl
+AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar],
+[pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS $1"
+AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h>],
+  [unsigned int crc = 0;
+   crc = __crc32cb(crc, 0);
+   crc = __crc32ch(crc, 0);
+   crc = __crc32cw(crc, 0);
+   crc = __crc32cd(crc, 0);
+   /* return computed value, to prevent the above being optimized away */
+   return crc == 0;])],
+  [Ac_cachevar=yes],
+  [Ac_cachevar=no])
+CFLAGS="$pgac_save_CFLAGS"])
+if test x"$Ac_cachevar" = x"yes"; then
+  CFLAGS_ARM64CE_CRC32C="$1"
+  pgac_arm64ce_crc32c_intrinsics=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_ARM64CE_CRC32C_INTRINSICS
diff --git a/configure b/configure
index 1242e310b4..9b1389df92 100755
--- a/configure
+++ b/configure
@@ -646,6 +646,7 @@ MSGMERGE
 MSGFMT_FLAGS
 MSGFMT
 PG_CRC32C_OBJS
+CFLAGS_ARM64CE_CRC32C
 CFLAGS_SSE42
 have_win32_dbghelp
 HAVE_IPV6
@@ -15509,28 +15510,175 @@ if ac_fn_c_try_compile "$LINENO"; then :
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
+# Check for ARM64 CRC Extensions intrinsics to do CRC calculations.
+#
+# First check if __crc32c* intrinsics can be used with the default compiler
+# flags. If not, check if adding -march=v8-a+crc flag helps.
+# CFLAGS_ARM64CE_CRC32C is set if that's required.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=" >&5
+$as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=... " >&6; }
+if ${pgac_cv_arm64ce_crc32c_intrinsics_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS "
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <arm_acle.h>
+int
+main ()
+{
+unsigned int crc = 0;
+   crc = __crc32cb(crc, 0);
+   crc = __crc32ch(crc, 0);
+   crc = __crc32cw(crc, 0);
+   crc = __crc32cd(crc, 0);
+   /* return computed value, to prevent the above being optimized away */
+   return crc == 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_arm64ce_crc32c_intrinsics_=yes
+else
+  pgac_cv_arm64ce_crc32c_intrinsics_=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm64ce_crc32c_intrinsics_" >&5
+$as_echo "$pgac_cv_arm64ce_crc32c_intrinsics_" >&6; }
+if test x"$pgac_cv_arm64ce_crc32c_intrinsics_" = x"yes"; then
+  CFLAGS_ARM64CE_CRC32C=""
+  pgac_arm64ce_crc32c_intrinsics=yes
+fi
+
+if test x"$pgac_arm64ce_crc32c_intrinsics" != x"yes"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc" >&5
+$as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc... " >&6; }
+if ${pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <arm_acle.h>
+int
+main ()
+{
+unsigned int crc = 0;
+   crc = __crc32cb(crc, 0);
+   crc = __crc32ch(crc, 0);
+   crc = __crc32cw(crc, 0);
+   crc = __crc32cd(crc, 0);
+   /* return computed value, to prevent the above being optimized away */
+   return crc == 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc=yes
+else
+  pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc" >&5
+$as_echo "$pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc" >&6; }
+if test x"$pgac_cv_arm64ce_crc32c_intrinsics__march_armv8_apcrc" = x"yes"; then
+  CFLAGS_ARM64CE_CRC32C="-march=armv8-a+crc"
+  pgac_arm64ce_crc32c_intrinsics=yes
+fi
+
+fi
+
+
+# In order to detect at runtime, if the ARM64 CRC Extension is available,
+# we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have
+# everything we need for that.
+for ac_func in getauxval
+do :
+  ac_fn_c_check_func "$LINENO" "getauxval" "ac_cv_func_getauxval"
+if test "x$ac_cv_func_getauxval" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_GETAUXVAL 1
+_ACEOF
+
+fi
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+int
+main ()
+{
+
+#ifndef AT_HWCAP
+#error AT_HWCAP not defined
+#endif
+#ifndef HWCAP_CRC32
+#error HWCAP_CRC32 not defined
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  HAVE_HWCAP_CRC32=1
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
 # Select CRC-32C implementation.
 #
-# If we are targeting a processor that has SSE 4.2 instructions, we can use the
-# special CRC instructions for calculating CRC-32C. If we're not targeting such
-# a processor, but we can nevertheless produce code that uses the SSE
-# intrinsics, perhaps with some extra CFLAGS, compile both implementations and
-# select which one to use at runtime, depending on whether SSE 4.2 is supported
-# by the processor we're running on.
+# If we are targeting a processor that has Intel SSE 4.2 instructions, we can
+# use the special CRC instructions for calculating CRC-32C. If we're not
+# targeting such a processor, but we can nevertheless produce code that uses
+# the SSE intrinsics, perhaps with some extra CFLAGS, compile both
+# implementations and select which one to use at runtime, depending on whether
+# SSE 4.2 is supported by the processor we're running on.
+#
+# Similarly, if we are targeting an ARM processor that has CRC instructions
+# that are part of the CRC Extension, use them, and if we're not targeting
+# such a processor but can nevertheless produce code that uses the CRC
+# instructions, compile both, and select at run time.
 #
 # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
 # in the template or configure command line.
-if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
+if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARM64CE_CRC32C" = x"" && test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x""; then
+  # Use Intel SSE 4.2 if available.
   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then
     USE_SSE42_CRC32C=1
   else
-    # the CPUID instruction is needed for the runtime check.
+    # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for
+    # the runtime check.
     if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
       USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
     else
-      # fall back to slicing-by-8 algorithm which doesn't require any special
-      # CPU support.
-      USE_SLICING_BY_8_CRC32C=1
+      # Use ARM64 CRC Extension if available.
+      if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARM64CE_CRC32C" = x""; then
+        USE_ARM64CE_CRC32C=1
+      else
+        # ARM64 CRC Extension, with runtime check? The getauxval() function and
+	# HWCAP_CRC32 are needed for the runtime check.
+        if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then
+          USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK=1
+        else
+          # fall back to slicing-by-8 algorithm which doesn't require any special
+          # CPU support.
+          USE_SLICING_BY_8_CRC32C=1
+	fi
+      fi
     fi
   fi
 fi
@@ -15550,16 +15698,34 @@ else
 
 $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h
 
-    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
+    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o"
     { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5
 $as_echo "SSE 4.2 with runtime check" >&6; }
   else
+    if test x"$USE_ARM64CE_CRC32C" = x"1"; then
+
+$as_echo "#define USE_ARM64CE_CRC32C 1" >>confdefs.h
+
+      PG_CRC32C_OBJS="pg_crc32c_arm64ce.o"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARM64 CE" >&5
+$as_echo "ARM64 CE" >&6; }
+    else
+      if test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
+
+$as_echo "#define USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+        PG_CRC32C_OBJS="pg_crc32c_arm64ce.o pg_crc32c_sb8.o pg_crc32c_arm64ce_choose.o"
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARM64 CE with runtime check" >&5
+$as_echo "ARM64 CE with runtime check" >&6; }
+      else
 
 $as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h
 
-    PG_CRC32C_OBJS="pg_crc32c_sb8.o"
-    { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5
+        PG_CRC32C_OBJS="pg_crc32c_sb8.o"
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5
 $as_echo "slicing-by-8" >&6; }
+      fi
+    fi
   fi
 fi
 
diff --git a/configure.in b/configure.in
index aee3ab0867..4d2e61b231 100644
--- a/configure.in
+++ b/configure.in
@@ -1901,28 +1901,73 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [
 #endif
 ])], [SSE4_2_TARGETED=1])
 
+# Check for ARM64 CRC Extensions intrinsics to do CRC calculations.
+#
+# First check if __crc32c* intrinsics can be used with the default compiler
+# flags. If not, check if adding -march=v8-a+crc flag helps.
+# CFLAGS_ARM64CE_CRC32C is set if that's required.
+PGAC_ARM64CE_CRC32C_INTRINSICS([])
+if test x"$pgac_arm64ce_crc32c_intrinsics" != x"yes"; then
+  PGAC_ARM64CE_CRC32C_INTRINSICS([-march=armv8-a+crc])
+fi
+AC_SUBST(CFLAGS_ARM64CE_CRC32C)
+
+# In order to detect at runtime, if the ARM64 CRC Extension is available,
+# we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have
+# everything we need for that.
+AC_CHECK_FUNCS([getauxval])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+], [
+#ifndef AT_HWCAP
+#error AT_HWCAP not defined
+#endif
+#ifndef HWCAP_CRC32
+#error HWCAP_CRC32 not defined
+#endif
+])], [HAVE_HWCAP_CRC32=1])
+
 # Select CRC-32C implementation.
 #
-# If we are targeting a processor that has SSE 4.2 instructions, we can use the
-# special CRC instructions for calculating CRC-32C. If we're not targeting such
-# a processor, but we can nevertheless produce code that uses the SSE
-# intrinsics, perhaps with some extra CFLAGS, compile both implementations and
-# select which one to use at runtime, depending on whether SSE 4.2 is supported
-# by the processor we're running on.
+# If we are targeting a processor that has Intel SSE 4.2 instructions, we can
+# use the special CRC instructions for calculating CRC-32C. If we're not
+# targeting such a processor, but we can nevertheless produce code that uses
+# the SSE intrinsics, perhaps with some extra CFLAGS, compile both
+# implementations and select which one to use at runtime, depending on whether
+# SSE 4.2 is supported by the processor we're running on.
+#
+# Similarly, if we are targeting an ARM processor that has CRC instructions
+# that are part of the CRC Extension, use them. And if we're not targeting
+# such a processor, but can nevertheless produce code that uses the CRC
+# instructions, compile both, and select at run time.
 #
 # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
 # in the template or configure command line.
-if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
+if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARM64CE_CRC32C" = x"" && test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x""; then
+  # Use Intel SSE 4.2 if available.
   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then
     USE_SSE42_CRC32C=1
   else
-    # the CPUID instruction is needed for the runtime check.
+    # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for
+    # the runtime check.
     if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
       USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
     else
-      # fall back to slicing-by-8 algorithm which doesn't require any special
-      # CPU support.
-      USE_SLICING_BY_8_CRC32C=1
+      # Use ARM64 CRC Extension if available.
+      if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARM64CE_CRC32C" = x""; then
+        USE_ARM64CE_CRC32C=1
+      else
+        # ARM64 CRC Extension, with runtime check? The getauxval() function and
+	# HWCAP_CRC32 are needed for the runtime check.
+        if test x"$pgac_arm64ce_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then
+          USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK=1
+        else
+          # fall back to slicing-by-8 algorithm which doesn't require any special
+          # CPU support.
+          USE_SLICING_BY_8_CRC32C=1
+	fi
+      fi
     fi
   fi
 fi
@@ -1936,12 +1981,24 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then
 else
   if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
     AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check.])
-    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
+    PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o"
     AC_MSG_RESULT(SSE 4.2 with runtime check)
   else
-    AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.])
-    PG_CRC32C_OBJS="pg_crc32c_sb8.o"
-    AC_MSG_RESULT(slicing-by-8)
+    if test x"$USE_ARM64CE_CRC32C" = x"1"; then
+      AC_DEFINE(USE_ARM64CE_CRC32C, 1, [Define to 1 to use ARM64 CE CRC instructions.])
+      PG_CRC32C_OBJS="pg_crc32c_arm64ce.o"
+      AC_MSG_RESULT(ARM64 CE)
+    else
+      if test x"$USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
+        AC_DEFINE(USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARM64 CE CRC instructions with a runtime check.])
+        PG_CRC32C_OBJS="pg_crc32c_arm64ce.o pg_crc32c_sb8.o pg_crc32c_arm64ce_choose.o"
+        AC_MSG_RESULT(ARM64 CE with runtime check)
+      else
+        AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.])
+        PG_CRC32C_OBJS="pg_crc32c_sb8.o"
+        AC_MSG_RESULT(slicing-by-8)
+      fi
+    fi
   fi
 fi
 AC_SUBST(PG_CRC32C_OBJS)
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index dcb8dc5d90..1044642bcc 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -250,6 +250,7 @@ SUN_STUDIO_CC = @SUN_STUDIO_CC@
 CFLAGS = @CFLAGS@
 CFLAGS_VECTOR = @CFLAGS_VECTOR@
 CFLAGS_SSE42 = @CFLAGS_SSE42@
+CFLAGS_ARM64CE_CRC32C = @CFLAGS_ARM64CE_CRC32C@
 
 # Kind-of compilers
 
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index f98f773ff0..a683771535 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -814,6 +814,12 @@
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
 #undef TM_IN_SYS_TIME
 
+/* Define to 1 to use ARM64 CE CRC instructions. */
+#undef USE_ARM64CE_CRC32C
+
+/* Define to 1 to use ARM64 CE CRC instructions with a runtime check. */
+#undef USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK
+
 /* Define to 1 to build with assertion checks. (--enable-cassert) */
 #undef USE_ASSERT_CHECKING
 
diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h
index ae2701e958..f6f69fba68 100644
--- a/src/include/port/pg_crc32c.h
+++ b/src/include/port/pg_crc32c.h
@@ -42,26 +42,42 @@ typedef uint32 pg_crc32c;
 #define EQ_CRC32C(c1, c2) ((c1) == (c2))
 
 #if defined(USE_SSE42_CRC32C)
-/* Use SSE4.2 instructions. */
+/* Use Intel SSE4.2 instructions. */
 #define COMP_CRC32C(crc, data, len) \
 	((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
 extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
 
-#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
+#elif defined(USE_ARM64CE_CRC32C)
+/* Use ARM64 CRC Extensions instructions. */
+
+#define COMP_CRC32C(crc, data, len)							\
+	((crc) = pg_comp_crc32c_arm64((crc), (data), (len)))
+#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
+
+extern pg_crc32c pg_comp_crc32c_arm64(pg_crc32c crc, const void *data, size_t len);
+
+#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK)
+
 /*
- * Use SSE4.2 instructions, but perform a runtime check first to check that
- * they are available.
+ * Use Intel SSE 4.2 or ARM64 instructions, but perform a runtime check first
+ * to check that they are available.
  */
 #define COMP_CRC32C(crc, data, len) \
 	((crc) = pg_comp_crc32c((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
-extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
 extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
 extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
 
+#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
+extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
+#endif
+#ifdef USE_ARM64CE_CRC32C_WITH_RUNTIME_CHECK
+extern pg_crc32c pg_comp_crc32c_arm64(pg_crc32c crc, const void *data, size_t len);
+#endif
+
 #else
 /*
  * Use slicing-by-8 algorithm.
diff --git a/src/port/Makefile b/src/port/Makefile
index 81f01b25bb..519b8b1a11 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -65,6 +65,10 @@ thread.o: CFLAGS+=$(PTHREAD_CFLAGS)
 pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42)
 pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42)
 
+# pg_crc32c_arm64ce.o and its _srv.o version need CFLAGS_ARM64CE_CRC32C
+pg_crc32c_arm64ce.o: CFLAGS+=$(CFLAGS_ARM64CE_CRC32C)
+pg_crc32c_arm64ce_srv.o: CFLAGS+=$(CFLAGS_ARM64CE_CRC32C)
+
 #
 # Server versions of object files
 #
diff --git a/src/port/pg_crc32c_arm64ce.c b/src/port/pg_crc32c_arm64ce.c
new file mode 100644
index 0000000000..bfbeef6dfb
--- /dev/null
+++ b/src/port/pg_crc32c_arm64ce.c
@@ -0,0 +1,50 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_crc32c_arm64ce.c
+ *	  Compute CRC-32C checksum using ARM64 CRC Extension instructions
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_crc32c_arm64ce.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "port/pg_crc32c.h"
+
+#include <arm_acle.h>
+
+pg_crc32c
+pg_comp_crc32c_arm64(pg_crc32c crc, const void *data, size_t len)
+{
+	const unsigned char *p = data;
+	const unsigned char *pend = p + len;
+
+	while (p + 8 <= pend)
+	{
+		crc = __crc32cd(crc, *(uint64 *) p);
+		p += 8;
+	}
+
+	if (p + 4 <= pend)
+	{
+		crc = __crc32cw(crc, *(uint32 *) p);
+		p += 4;
+	}
+
+	if (p + 2 <= pend)
+	{
+		crc = __crc32ch(crc, *(uint16 *) p);
+		p += 2;
+	}
+
+	if (p < pend)
+	{
+		crc = __crc32cb(crc, *p);
+	}
+	return crc;
+}
diff --git a/src/port/pg_crc32c_arm64ce_choose.c b/src/port/pg_crc32c_arm64ce_choose.c
new file mode 100644
index 0000000000..0d45ca726a
--- /dev/null
+++ b/src/port/pg_crc32c_arm64ce_choose.c
@@ -0,0 +1,50 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_crc32c_arm64ce_choose.c
+ *	  Choose which CRC-32C implementation to use, at runtime.
+ *
+ * Use the special CRC instructions introduced in ARMv8 CRC Extension, if
+ * available on the platform we're running on, but fall back to the
+ * slicing-by-8 implementation otherwise.
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_crc32c_arm64ce_choose.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "c.h"
+
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+#include "port/pg_crc32c.h"
+
+static bool
+pg_crc32c_arm64ce_available(void)
+{
+	unsigned long auxv = getauxval(AT_HWCAP);
+
+	return (auxv & HWCAP_CRC32) != 0;
+}
+
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static pg_crc32c
+pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
+{
+	if (pg_crc32c_arm64ce_available())
+		pg_comp_crc32c = pg_comp_crc32c_arm64;
+	else
+		pg_comp_crc32c = pg_comp_crc32c_sb8;
+
+	return pg_comp_crc32c(crc, data, len);
+}
+
+pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
diff --git a/src/port/pg_crc32c_choose.c b/src/port/pg_crc32c_sse42_choose.c
similarity index 87%
rename from src/port/pg_crc32c_choose.c
rename to src/port/pg_crc32c_sse42_choose.c
index 40bee67b0a..cde38d8dbf 100644
--- a/src/port/pg_crc32c_choose.c
+++ b/src/port/pg_crc32c_sse42_choose.c
@@ -1,10 +1,10 @@
 /*-------------------------------------------------------------------------
  *
- * pg_crc32c_choose.c
+ * pg_crc32c_sse42_choose.c
  *	  Choose which CRC-32C implementation to use, at runtime.
  *
- * Try to the special CRC instructions introduced in Intel SSE 4.2,
- * if available on the platform we're running on, but fall back to the
+ * Use the special CRC instructions introduced in Intel SSE 4.2, if
+ * available on the platform we're running on, but fall back to the
  * slicing-by-8 implementation otherwise.
  *
  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *	  src/port/pg_crc32c_choose.c
+ *	  src/port/pg_crc32c_sse42_choose.c
  *
  *-------------------------------------------------------------------------
  */

Reply via email to