Hi.
Here's a proposed patch to use CPUID at startup to determine if the
SSE4.2 CRC instructions are available, to use them instead of the
slice-by-8 implementation (posted earlier).
A few notes:
1. GCC has included cpuid.h since 4.3.0, so I figured it was safe to
use. It can be replaced with some inline assembly otherwise.
2. I've also used the crc32b/crc32q instructions directly rather than
using ".bytes" to encode the instructions; bintuils versions since
2007 or so have supported them.
3. I've included the MSVC implementation mostly as an example of how to
extend this to different compilers/platforms. It's written according
to the documentation for MSVC intrinsics, but I have not tested it.
Suggestions/improvements are welcome.
-- Abhijit
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index 73c30c5..ae34876 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -37,6 +37,7 @@
#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/ps_status.h"
+#include "utils/pg_crc.h"
const char *progname;
@@ -76,6 +77,12 @@ main(int argc, char *argv[])
argv = save_ps_display_args(argc, argv);
/*
+ * Select the fastest available CRC32 implementation for the
+ * platform.
+ */
+ pg_init_comp_crc32c();
+
+ /*
* If supported on the current platform, set up a handler to be called if
* the backend/postmaster crashes with a fatal signal or exception.
*/
diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h
index 55934e5..c59c05b 100644
--- a/src/include/utils/pg_crc.h
+++ b/src/include/utils/pg_crc.h
@@ -41,7 +41,8 @@
typedef uint32 pg_crc32;
-extern pg_crc32 pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len);
+extern void pg_init_comp_crc32c(void);
+extern pg_crc32 (*pg_comp_crc32c)(pg_crc32 crc, const void *data, size_t len);
/*
* CRC calculation using the CRC-32C (Castagnoli) polynomial.
diff --git a/src/port/pg_crc.c b/src/port/pg_crc.c
index 2f9857b..6be17b0 100644
--- a/src/port/pg_crc.c
+++ b/src/port/pg_crc.c
@@ -21,6 +21,13 @@
#include "utils/pg_crc.h"
#include "utils/pg_crc_tables.h"
+#if defined(HAVE_CPUID_H)
+#include <cpuid.h>
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#include <nmmintrin.h>
+#endif
+
static inline uint32 bswap32(uint32 x)
{
#if defined(__GNUC__) || defined(__clang__)
@@ -39,8 +46,8 @@ static inline uint32 bswap32(uint32 x)
#define cpu_to_le32(x) x
#endif
-pg_crc32
-pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len)
+static pg_crc32
+pg_comp_crc32c_sb8(pg_crc32 crc, const void *data, size_t len)
{
const unsigned char *p = data;
const uint32 *p8;
@@ -61,7 +68,6 @@ pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len)
*/
p8 = (const uint32 *) p;
-
while (len >= 8)
{
uint32 a = *p8++ ^ cpu_to_le32(crc);
@@ -101,8 +107,102 @@ pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len)
*/
p = (const unsigned char *) p8;
- while (len-- > 0)
+ while (len > 0)
+ {
crc = pg_crc32c_table[0][(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+ len--;
+ }
+
+ return crc;
+}
+static pg_crc32
+pg_asm_crc32b(pg_crc32 crc, unsigned char data)
+{
+#ifdef __GNUC__
+ __asm__ ("crc32b %[data], %[crc]\n" : [crc] "+r" (crc) : [data] "rm" (data));
return crc;
+#elif defined(_MSC_VER)
+ return _mm_crc32_u8(crc, data);
+#else
+#error "Don't know how to generate crc32b instruction"
+#endif
}
+
+static pg_crc32
+pg_asm_crc32q(uint64 crc, unsigned long long data)
+{
+#ifdef __GNUC__
+ __asm__ ("crc32q %[data], %[crc]\n" : [crc] "+r" (crc) : [data] "rm" (data));
+ return crc;
+#elif defined(_MSC_VER)
+ return _mm_crc32_u64(crc, data);
+#else
+#error "Don't know how to generate crc32q instruction"
+#endif
+}
+
+static pg_crc32
+pg_comp_crc32c_sse(pg_crc32 crc, const void *data, size_t len)
+{
+ const unsigned char *p = data;
+ const uint64 *p8;
+
+ /*
+ * Handle initial bytes one at a time if necessary to ensure that
+ * the loop below starts with a pointer aligned to four bytes.
+ */
+
+ while (len > 0 && ((uintptr_t) p & 3))
+ {
+ crc = pg_asm_crc32b(crc, *p++);
+ len--;
+ }
+
+ /*
+ * Process eight bytes of data at a time.
+ */
+
+ p8 = (const uint64 *) p;
+ while (len >= 8)
+ {
+ crc = pg_asm_crc32q(crc, *p8++);
+ len -= 8;
+ }
+
+ /*
+ * Handle any remaining bytes one at a time.
+ */
+
+ p = (const unsigned char *) p8;
+ while (len > 0)
+ {
+ crc = pg_asm_crc32b(crc, *p++);
+ len--;
+ }
+
+ return crc;
+}
+
+/*
+ * If (we can tell that) the CPU supports SSE4.2 instructions, we can
+ * use the CRC instruction, otherwise we fall back to slice-by-8 in
+ * software.
+ */
+
+void
+pg_init_comp_crc32c(void)
+{
+ unsigned int exx[4] = { 0, 0, 0, 0 };
+
+#if defined(__GNUC__) && defined(HAVE_CPUID_H)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(_MSC_VER)
+ __cpuid(exx, 1);
+#endif
+
+ if (exx[2] & (1 << 20))
+ pg_comp_crc32c = pg_comp_crc32c_sse;
+}
+
+pg_crc32 (*pg_comp_crc32c)(pg_crc32 crc, const void *data, size_t len) = pg_comp_crc32c_sb8;
diff --git a/configure b/configure
index 7594401..284ca6f 100755
--- a/configure
+++ b/configure
@@ -9195,7 +9195,7 @@ fi
done
-for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
+for ac_header in atomic.h cpuid.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
diff --git a/configure.in b/configure.in
index 0dc3f18..8ab888d 100644
--- a/configure.in
+++ b/configure.in
@@ -1023,7 +1023,7 @@ AC_SUBST(UUID_LIBS)
##
dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
-AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
+AC_CHECK_HEADERS([atomic.h cpuid.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
# On BSD, test for net/if.h will fail unless sys/socket.h
# is included first.
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 465281c..355f5fc 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -99,6 +99,9 @@
/* Define to 1 if you have the `class' function. */
#undef HAVE_CLASS
+/* Define to 1 if you have the <cpuid.h> header file. */
+#undef HAVE_CPUID_H
+
/* Define to 1 if you have the <crtdefs.h> header file. */
#undef HAVE_CRTDEFS_H
diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32
index 05941e6..c2fe01f 100644
--- a/src/include/pg_config.h.win32
+++ b/src/include/pg_config.h.win32
@@ -78,6 +78,9 @@
/* Define to 1 if you have the `class' function. */
/* #undef HAVE_CLASS */
+/* Define to 1 if you have the <cpuid.h> header file. */
+/* #undef HAVE_CPUID_H */
+
/* Define to 1 if you have the `crypt' function. */
/* #undef HAVE_CRYPT */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers