On Wed, Feb 25, 2026 at 2:59 AM Tom Lane <[email protected]> wrote: > It appears that if you want to build pg_cpu_x86.o unconditionally, > you need to make it more proof against the cases it wasn't getting > built in before.
Thanks, I must have stopped watching the buildfarm too early. I've pushed a fix which will get undone as part of v6-0002. On Wed, Feb 25, 2026 at 2:57 AM Zsolt Parragi <[email protected]> wrote: > > 2 and 3 looks good too, I only found two more typos: > > > + return pg_comp_crc32c(crc, data, len); > +}; > > That semicolon is not needed > > > And in the commit message: > > "it has been intialized and if" > > That should be initialized Also fixed, thanks. -- John Naylor Amazon Web Services
From 7de238c56593850e05351618ea730c4668773cc0 Mon Sep 17 00:00:00 2001 From: John Naylor <[email protected]> Date: Wed, 25 Feb 2026 08:03:45 +0700 Subject: [PATCH v6 2/3] Centralize detection of x86 CPU features We now maintain an array of booleans that indicate which features were detected at runtime. When code wants to check for a given feature, the array is automatically checked if it has been initialized and if not, a single function checks all features at once. Move all x86 feature detection to pg_cpu_x86.c, and move the CRC function choosing logic to the file where the hardware-specific functions are defined, consistent with more recent hardware-specific files in src/port. Reviewed-by: Zsolt Parragi <[email protected]> Discussion: https://postgr.es/m/CANWCAZbgEUFw7LuYSVeJ=tj98r5hoob1ffeqk3alvbw5ru5...@mail.gmail.com --- src/include/port/pg_cpu.h | 50 ++++++++++++++++++ src/port/pg_cpu_x86.c | 65 +++++++++-------------- src/port/pg_crc32c_sse42.c | 32 +++++++++++ src/port/pg_popcount_x86.c | 91 ++------------------------------ src/tools/pgindent/typedefs.list | 1 + 5 files changed, 112 insertions(+), 127 deletions(-) create mode 100644 src/include/port/pg_cpu.h diff --git a/src/include/port/pg_cpu.h b/src/include/port/pg_cpu.h new file mode 100644 index 00000000000..b93b828d3ac --- /dev/null +++ b/src/include/port/pg_cpu.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * pg_cpu.h + * Runtime CPU feature detection + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/pg_cpu.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_CPU_H +#define PG_CPU_H + +#if defined(USE_SSE2) || defined(__i386__) + +typedef enum X86FeatureId +{ + /* Have we run feature detection? */ + INIT_PG_X86, + + /* scalar registers and 128-bit XMM registers */ + PG_SSE4_2, + PG_POPCNT, + + /* 512-bit ZMM registers */ + PG_AVX512_BW, + PG_AVX512_VL, + PG_AVX512_VPCLMULQDQ, + PG_AVX512_VPOPCNTDQ, +} X86FeatureId; +#define X86FeaturesSize (PG_AVX512_VPOPCNTDQ + 1) + +extern PGDLLIMPORT bool X86Features[]; + +extern void set_x86_features(void); + +static inline bool +x86_feature_available(X86FeatureId feature) +{ + if (X86Features[INIT_PG_X86] == false) + set_x86_features(); + + return X86Features[feature]; +} + +#endif /* defined(USE_SSE2) || defined(__i386__) */ + +#endif /* PG_CPU_H */ diff --git a/src/port/pg_cpu_x86.c b/src/port/pg_cpu_x86.c index 0c292c0223a..88863f9762c 100644 --- a/src/port/pg_cpu_x86.c +++ b/src/port/pg_cpu_x86.c @@ -1,12 +1,7 @@ /*------------------------------------------------------------------------- * * pg_cpu_x86.c - * Choose between Intel SSE 4.2 and software CRC-32C implementation. - * - * On first call, checks if the CPU we're running on supports Intel SSE - * 4.2. If it does, use the special SSE instructions for CRC-32C - * computation. Otherwise, fall back to the pure software implementation - * (slicing-by-8). + * Runtime CPU feature detection for x86 * * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -34,9 +29,11 @@ #include <immintrin.h> #endif -#include "port/pg_crc32c.h" +#include "port/pg_cpu.h" + -#ifndef USE_SLICING_BY_8_CRC32C +/* array indexed by enum X86FeatureId */ +bool X86Features[X86FeaturesSize] = {0}; /* * Does XGETBV say the ZMM registers are enabled? @@ -58,22 +55,13 @@ zmm_regs_available(void) } /* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. + * Parse the CPU ID info for runtime checks. */ -static pg_crc32c -pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) +void +set_x86_features(void) { unsigned int exx[4] = {0, 0, 0, 0}; - /* - * Set fallback. We must guard since slicing-by-8 is not visible - * everywhere. - */ -#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK - pg_comp_crc32c = pg_comp_crc32c_sb8; -#endif - #if defined(HAVE__GET_CPUID) __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUID) @@ -82,36 +70,33 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) #error cpuid instruction not available #endif - if ((exx[2] & (1 << 20)) != 0) /* SSE 4.2 */ - { - pg_comp_crc32c = pg_comp_crc32c_sse42; + X86Features[PG_SSE4_2] = exx[2] >> 20 & 1; + X86Features[PG_POPCNT] = exx[2] >> 23 & 1; - if (exx[2] & (1 << 27) && /* OSXSAVE */ - zmm_regs_available()) - { - /* second cpuid call on leaf 7 to check extended AVX-512 support */ + /* All these features depend on OSXSAVE */ + if (exx[2] & (1 << 27)) + { + /* second cpuid call on leaf 7 to check extended AVX-512 support */ - memset(exx, 0, 4 * sizeof(exx[0])); + memset(exx, 0, 4 * sizeof(exx[0])); #if defined(HAVE__GET_CPUID_COUNT) - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUIDEX) - __cpuidex(exx, 7, 0); + __cpuidex(exx, 7, 0); #endif -#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK - if (exx[2] & (1 << 10) && /* VPCLMULQDQ */ - exx[1] & (1 << 31)) /* AVX512-VL */ - pg_comp_crc32c = pg_comp_crc32c_avx512; -#endif + if (zmm_regs_available()) + { + X86Features[PG_AVX512_BW] = exx[1] >> 30 & 1; + X86Features[PG_AVX512_VL] = exx[1] >> 31 & 1; + + X86Features[PG_AVX512_VPCLMULQDQ] = exx[2] >> 10 & 1; + X86Features[PG_AVX512_VPOPCNTDQ] = exx[2] >> 14 & 1; } } - return pg_comp_crc32c(crc, data, len); + X86Features[INIT_PG_X86] = true; } -pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; - -#endif - #endif /* defined(USE_SSE2) || defined(__i386__) */ diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index c1279d31fbd..b8e77faf4d9 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -19,8 +19,11 @@ #include <immintrin.h> #endif +#include "port/pg_cpu.h" #include "port/pg_crc32c.h" +static pg_crc32c pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len); + pg_attribute_no_sanitize_alignment() pg_attribute_target("sse4.2") pg_crc32c @@ -158,4 +161,33 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len) return pg_comp_crc32c_sse42(crc0, buf, len); } +#endif /* USE_AVX512_CRC32C_WITH_RUNTIME_CHECK */ + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static pg_crc32c +pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) +{ + /* + * Set fallback. We must guard since slicing-by-8 is not visible + * everywhere. + */ +#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK + pg_comp_crc32c = pg_comp_crc32c_sb8; +#endif + + if (x86_feature_available(PG_SSE4_2)) + pg_comp_crc32c = pg_comp_crc32c_sse42; + +#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK + if (x86_feature_available(PG_AVX512_VL) && + x86_feature_available(PG_AVX512_VPCLMULQDQ)) + pg_comp_crc32c = pg_comp_crc32c_avx512; #endif + + return pg_comp_crc32c(crc, data, len); +} + +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_popcount_x86.c b/src/port/pg_popcount_x86.c index 6bce089432f..a99613f1818 100644 --- a/src/port/pg_popcount_x86.c +++ b/src/port/pg_popcount_x86.c @@ -14,19 +14,12 @@ #ifdef HAVE_X86_64_POPCNTQ -#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) -#include <cpuid.h> -#endif - #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK #include <immintrin.h> #endif -#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) -#include <intrin.h> -#endif - #include "port/pg_bitutils.h" +#include "port/pg_cpu.h" /* * The SSE4.2 versions are built regardless of whether we are building the @@ -58,84 +51,9 @@ static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask); uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose; uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose; -/* - * Return true if CPUID indicates that the POPCNT instruction is available. - */ -static bool -pg_popcount_sse42_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK -/* - * Does CPUID say there's support for XSAVE instructions? - */ -static inline bool -xsave_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 27)) != 0; /* osxsave */ -} - -/* - * Does XGETBV say the ZMM registers are enabled? - * - * NB: Caller is responsible for verifying that xsave_available() returns true - * before calling this. - */ -#ifdef HAVE_XSAVE_INTRINSICS -pg_attribute_target("xsave") -#endif -static inline bool -zmm_regs_available(void) -{ -#ifdef HAVE_XSAVE_INTRINSICS - return (_xgetbv(0) & 0xe6) == 0xe6; -#else - return false; -#endif -} - -/* - * Does CPUID say there's support for AVX-512 popcount and byte-and-word - * instructions? - */ -static inline bool -avx512_popcnt_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID_COUNT) - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUIDEX) - __cpuidex(exx, 7, 0); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */ - (exx[1] & (1 << 30)) != 0; /* avx512-bw */ -} - /* * Returns true if the CPU supports the instructions required for the AVX-512 * pg_popcount() implementation. @@ -143,9 +61,8 @@ avx512_popcnt_available(void) static bool pg_popcount_avx512_available(void) { - return xsave_available() && - zmm_regs_available() && - avx512_popcnt_available(); + return x86_feature_available(PG_AVX512_BW) && + x86_feature_available(PG_AVX512_VPOPCNTDQ); } #endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */ @@ -159,7 +76,7 @@ pg_popcount_avx512_available(void) static inline void choose_popcount_functions(void) { - if (pg_popcount_sse42_available()) + if (x86_feature_available(PG_POPCNT)) { pg_popcount_optimized = pg_popcount_sse42; pg_popcount_masked_optimized = pg_popcount_masked_sse42; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 241945734ec..041b99976c6 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3394,6 +3394,7 @@ X509_NAME X509_NAME_ENTRY X509_STORE X509_STORE_CTX +X86FeatureId XLTW_Oper XLogCtlData XLogCtlInsert -- 2.53.0
From 2860144f2baed43c32221fec328d27fbe1a01e25 Mon Sep 17 00:00:00 2001 From: John Naylor <[email protected]> Date: Mon, 23 Feb 2026 21:17:49 +0700 Subject: [PATCH v6 3/3] Refactor detection of x86 ZMM registers - Call _xgetbv within x86_set_runtime_features rather than in a separate function - Use symbols for XCR mask bits rather than a magic constant A future commit will build on this to detect YMM registers without code duplication. Reviewed-by: Zsolt Parragi <[email protected]> Discussion: https://postgr.es/m/CANWCAZbgEUFw7LuYSVeJ=tj98r5hoob1ffeqk3alvbw5ru5...@mail.gmail.com --- src/port/pg_cpu_x86.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/port/pg_cpu_x86.c b/src/port/pg_cpu_x86.c index 88863f9762c..b0e0048f561 100644 --- a/src/port/pg_cpu_x86.c +++ b/src/port/pg_cpu_x86.c @@ -31,32 +31,29 @@ #include "port/pg_cpu.h" +/* XSAVE state component bits that we need */ +#define XMM (1<<1) +#define YMM (1<<2) +#define OPMASK (1<<5) +#define ZMM0_15 (1<<6) +#define ZMM16_31 (1<<7) + /* array indexed by enum X86FeatureId */ bool X86Features[X86FeaturesSize] = {0}; -/* - * Does XGETBV say the ZMM registers are enabled? - * - * NB: Caller is responsible for verifying that osxsave is available - * before calling this. - */ -#ifdef HAVE_XSAVE_INTRINSICS -pg_attribute_target("xsave") -#endif static bool -zmm_regs_available(void) +mask_available(uint32 value, uint32 mask) { -#ifdef HAVE_XSAVE_INTRINSICS - return (_xgetbv(0) & 0xe6) == 0xe6; -#else - return false; -#endif + return (value & mask) == mask; } /* * Parse the CPU ID info for runtime checks. */ +#ifdef HAVE_XSAVE_INTRINSICS +pg_attribute_target("xsave") +#endif void set_x86_features(void) { @@ -76,17 +73,24 @@ set_x86_features(void) /* All these features depend on OSXSAVE */ if (exx[2] & (1 << 27)) { - /* second cpuid call on leaf 7 to check extended AVX-512 support */ + uint32 xcr0_val = 0; + /* second cpuid call on leaf 7 to check extended AVX-512 support */ memset(exx, 0, 4 * sizeof(exx[0])); - #if defined(HAVE__GET_CPUID_COUNT) __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUIDEX) __cpuidex(exx, 7, 0); #endif - if (zmm_regs_available()) +#ifdef HAVE_XSAVE_INTRINSICS + /* get value of Extended Control Register */ + xcr0_val = _xgetbv(0); +#endif + + /* Are ZMM registers enabled? */ + if (mask_available(xcr0_val, XMM | YMM | + OPMASK | ZMM0_15 | ZMM16_31)) { X86Features[PG_AVX512_BW] = exx[1] >> 30 & 1; X86Features[PG_AVX512_VL] = exx[1] >> 31 & 1; -- 2.53.0
