From 16693caca491f9d52cff463dfc85bbbd54df9064 Mon Sep 17 00:00:00 2001
From: Paul Amonson <paul.d.amonson@intel.com>
Date: Tue, 23 Jul 2024 11:23:23 -0700
Subject: [PATCH] [Refactor] Move all HW checks to common file.

Signed-off-by: Paul Amonson <paul.d.amonson@intel.com>
---
 configure                            |  12 +-
 configure.ac                         |   2 +-
 src/include/port/pg_bitutils.h       |   1 -
 src/include/port/pg_hw_feat_check.h  |  33 ++++++
 src/port/Makefile                    |   9 +-
 src/port/meson.build                 |   2 +-
 src/port/pg_bitutils.c               |  22 +---
 src/port/pg_crc32c_sse42_choose.c    |  27 +----
 src/port/pg_hw_feat_check.c          | 159 +++++++++++++++++++++++++++
 src/port/pg_popcount_avx512_choose.c | 102 -----------------
 10 files changed, 208 insertions(+), 161 deletions(-)
 create mode 100644 src/include/port/pg_hw_feat_check.h
 create mode 100644 src/port/pg_hw_feat_check.c
 delete mode 100644 src/port/pg_popcount_avx512_choose.c

diff --git a/configure b/configure
index 2abbeb2794..5be6fb4d5f 100755
--- a/configure
+++ b/configure
@@ -14868,7 +14868,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -14914,7 +14914,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -14938,7 +14938,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -14983,7 +14983,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -15007,7 +15007,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -17674,7 +17674,7 @@ fi
 
   fi
   if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
-    PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
+    PG_POPCNT_OBJS="pg_popcount_avx512.o"
 
 $as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h
 
diff --git a/configure.ac b/configure.ac
index c46ed2c591..2e64f53898 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2090,7 +2090,7 @@ if test x"$host_cpu" = x"x86_64"; then
     PGAC_AVX512_POPCNT_INTRINSICS([-mavx512vpopcntdq -mavx512bw])
   fi
   if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
-    PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
+    PG_POPCNT_OBJS="pg_popcount_avx512.o"
     AC_DEFINE(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX-512 popcount instructions with a runtime check.])
   fi
 fi
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 4d88478c9c..263f27930d 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -312,7 +312,6 @@ extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int
  * files.
  */
 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
-extern bool pg_popcount_avx512_available(void);
 extern uint64 pg_popcount_avx512(const char *buf, int bytes);
 extern uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask);
 #endif
diff --git a/src/include/port/pg_hw_feat_check.h b/src/include/port/pg_hw_feat_check.h
new file mode 100644
index 0000000000..58be900b54
--- /dev/null
+++ b/src/include/port/pg_hw_feat_check.h
@@ -0,0 +1,33 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_hw_feat_check.h
+ *	  Miscellaneous functions for cheing for hardware features at runtime.
+ *
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * src/include/port/pg_hw_feat_check.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_HW_FEAT_CHECK_H
+#define PG_HW_FEAT_CHECK_H
+
+/*
+ * Test to see if all hardware features required by SSE 4.2 crc32c (64 bit)
+ * are available.
+ */
+extern PGDLLIMPORT bool pg_crc32c_sse42_available(void);
+
+/*
+ * Test to see if all hardware features required by SSE 4.1 POPCNT (64 bit)
+ * are available.
+ */
+extern PGDLLIMPORT bool pg_popcount_available(void);
+
+/*
+ * Test to see if all hardware features required by AVX-512 POPCNT are
+ * available.
+ */
+extern PGDLLIMPORT bool pg_popcount_avx512_available(void);
+#endif							/* PG_HW_FEAT_CHECK_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index db7c02117b..b18710eeef 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -45,6 +45,7 @@ OBJS = \
 	noblock.o \
 	path.o \
 	pg_bitutils.o \
+	pg_hw_feat_check.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
 	pgmkdirp.o \
@@ -93,10 +94,10 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC)
 pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
 pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC)
 
-# all versions of pg_popcount_avx512_choose.o need CFLAGS_XSAVE
-pg_popcount_avx512_choose.o: CFLAGS+=$(CFLAGS_XSAVE)
-pg_popcount_avx512_choose_shlib.o: CFLAGS+=$(CFLAGS_XSAVE)
-pg_popcount_avx512_choose_srv.o: CFLAGS+=$(CFLAGS_XSAVE)
+# all versions of pg_hw_feat_check.o need CFLAGS_XSAVE
+pg_hw_feat_check.o:	CFLAGS+=$(CFLAGS_XSAVE)
+pg_hw_feat_check_shlib.o:	CFLAGS+=$(CFLAGS_XSAVE)
+pg_hw_feat_check_srv.o:	CFLAGS+=$(CFLAGS_XSAVE)
 
 # all versions of pg_popcount_avx512.o need CFLAGS_POPCNT
 pg_popcount_avx512.o: CFLAGS+=$(CFLAGS_POPCNT)
diff --git a/src/port/meson.build b/src/port/meson.build
index ff54b7b53e..f8cafc4bd4 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -86,7 +86,7 @@ replace_funcs_pos = [
   ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
   ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
   ['pg_popcount_avx512', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'popcnt'],
-  ['pg_popcount_avx512_choose', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'xsave'],
+  ['pg_hw_feat_check', 'HAVE_XSAVE_INTRINSICS', 'xsave'],
 
   # arm / aarch64
   ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'],
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index 87f56e82b8..b2823d5732 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -20,7 +20,7 @@
 #endif
 
 #include "port/pg_bitutils.h"
-
+#include "port/pg_hw_feat_check.h"
 
 /*
  * Array giving the position of the left-most set bit for each possible
@@ -109,7 +109,6 @@ static uint64 pg_popcount_slow(const char *buf, int bytes);
 static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
 
 #ifdef TRY_POPCNT_FAST
-static bool pg_popcount_available(void);
 static int	pg_popcount32_choose(uint32 word);
 static int	pg_popcount64_choose(uint64 word);
 static uint64 pg_popcount_choose(const char *buf, int bytes);
@@ -127,25 +126,6 @@ uint64		(*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask)
 
 #ifdef TRY_POPCNT_FAST
 
-/*
- * Return true if CPUID indicates that the POPCNT instruction is available.
- */
-static bool
-pg_popcount_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-
-	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
-}
-
 /*
  * These functions get called on the first call to pg_popcount32 etc.
  * They detect whether we can use the asm implementations, and replace
diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c
index 56d600f3a9..36e6949362 100644
--- a/src/port/pg_crc32c_sse42_choose.c
+++ b/src/port/pg_crc32c_sse42_choose.c
@@ -20,31 +20,8 @@
 
 #include "c.h"
 
-#ifdef HAVE__GET_CPUID
-#include <cpuid.h>
-#endif
-
-#ifdef HAVE__CPUID
-#include <intrin.h>
-#endif
-
 #include "port/pg_crc32c.h"
-
-static bool
-pg_crc32c_sse42_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-
-	return (exx[2] & (1 << 20)) != 0;	/* SSE 4.2 */
-}
+#include "port/pg_hw_feat_check.h"
 
 /*
  * This gets called on the first call. It replaces the function pointer
@@ -61,4 +38,4 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
 	return pg_comp_crc32c(crc, data, len);
 }
 
-pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
+pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
diff --git a/src/port/pg_hw_feat_check.c b/src/port/pg_hw_feat_check.c
new file mode 100644
index 0000000000..455005add5
--- /dev/null
+++ b/src/port/pg_hw_feat_check.c
@@ -0,0 +1,159 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_hw_feat_check.c
+ *		Test for hardware features at runtime on x86_64 platforms.
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/port/pg_hw_feat_check.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#include <immintrin.h>
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+#include "port/pg_hw_feat_check.h"
+
+/* Define names for EXX registers to avoid hard to see bugs in code below. */
+typedef unsigned int exx_t;
+typedef enum
+{
+	EAX = 0,
+	EBX = 1,
+	ECX = 2,
+	EDX = 3
+} reg_name;
+
+/*
+ * Helper function.
+ * Test for a bit being set in a exx_t register.
+ */
+inline static bool is_bit_set_in_exx(exx_t* regs, reg_name ex, int bit)
+{
+	return ((regs[ex] & (1 << bit)) != 0);
+}
+
+/*
+ * x86_64 Platform CPUID check for Linux and Visual Studio platforms.
+ */
+inline static void
+pg_getcpuid(unsigned int leaf, exx_t *exx)
+{
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(leaf, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+}
+
+/*
+ * x86_64 Platform CPUIDEX check for Linux and Visual Studio platforms.
+ */
+inline static void
+pg_getcpuidex(unsigned int leaf, unsigned int subleaf, exx_t *exx)
+{
+#if defined(HAVE__GET_CPUID_COUNT)
+	__get_cpuid_count(leaf, subleaf, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+	__cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+}
+
+/*
+ * Check for CPU support for CPUID: osxsave
+ */
+inline static bool
+osxsave_available(void)
+{
+#if defined(HAVE_XSAVE_INTRINSICS)
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuid(1, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 27); /* osxsave */
+#else
+	return false;
+#endif
+}
+
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that osxsave_available() returns true
+ * before calling this.
+ */
+inline static bool
+zmm_regs_available(void)
+{
+#if defined(HAVE_XSAVE_INTRINSICS)
+	return (_xgetbv(0) & 0xe6) == 0xe6;
+#else
+	return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-512 popcount and byte-and-word
+ * instructions?
+ */
+inline static bool
+avx512_popcnt_available(void)
+{
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuidex(7, 0, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 14) && is_bit_set_in_exx(exx, EBX, 30);
+}
+
+/*
+ * Return true if CPUID indicates that the POPCNT instruction is available.
+ */
+bool PGDLLIMPORT pg_popcount_available(void)
+{
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuid(1, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 23);
+ }
+
+ /*
+  * Returns true if the CPU supports the instructions required for the AVX-512
+  * pg_popcount() implementation.
+  *
+  * PA: The call to 'osxsave_available' MUST preceed the call to
+  *     'zmm_regs_available' function per NB above.
+  */
+bool PGDLLIMPORT pg_popcount_avx512_available(void)
+{
+	 return osxsave_available() &&
+			zmm_regs_available() &&
+			avx512_popcnt_available();
+}
+
+/*
+ * Does CPUID say there's support for SSE 4.2?
+ */
+bool PGDLLIMPORT pg_crc32c_sse42_available(void)
+{
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuid(1, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 20);
+}
diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c
deleted file mode 100644
index b37107803a..0000000000
--- a/src/port/pg_popcount_avx512_choose.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_popcount_avx512_choose.c
- *    Test whether we can use the AVX-512 pg_popcount() implementation.
- *
- * Copyright (c) 2024, PostgreSQL Global Development Group
- *
- * IDENTIFICATION
- *    src/port/pg_popcount_avx512_choose.c
- *
- *-------------------------------------------------------------------------
- */
-#include "c.h"
-
-#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
-#include <cpuid.h>
-#endif
-
-#ifdef HAVE_XSAVE_INTRINSICS
-#include <immintrin.h>
-#endif
-
-#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
-#include <intrin.h>
-#endif
-
-#include "port/pg_bitutils.h"
-
-/*
- * It's probably unlikely that TRY_POPCNT_FAST won't be set if we are able to
- * use AVX-512 intrinsics, but we check it anyway to be sure.  We piggy-back on
- * the function pointers that are only used when TRY_POPCNT_FAST is set.
- */
-#ifdef TRY_POPCNT_FAST
-
-/*
- * Does CPUID say there's support for XSAVE instructions?
- */
-static inline bool
-xsave_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
-}
-
-/*
- * Does XGETBV say the ZMM registers are enabled?
- *
- * NB: Caller is responsible for verifying that xsave_available() returns true
- * before calling this.
- */
-static inline bool
-zmm_regs_available(void)
-{
-#ifdef HAVE_XSAVE_INTRINSICS
-	return (_xgetbv(0) & 0xe6) == 0xe6;
-#else
-	return false;
-#endif
-}
-
-/*
- * Does CPUID say there's support for AVX-512 popcount and byte-and-word
- * instructions?
- */
-static inline bool
-avx512_popcnt_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID_COUNT)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
-	__cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
-	return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
-		(exx[1] & (1 << 30)) != 0;	/* avx512-bw */
-}
-
-/*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
- */
-bool
-pg_popcount_avx512_available(void)
-{
-	return xsave_available() &&
-		zmm_regs_available() &&
-		avx512_popcnt_available();
-}
-
-#endif							/* TRY_POPCNT_FAST */
-- 
2.34.1

