Hi David, On Wed, Feb 26, 2020 at 9:56 PM David Fetter <da...@fetter.org> wrote: > > On Wed, Feb 26, 2020 at 09:12:24AM +0100, David Fetter wrote: > > On Fri, Jan 31, 2020 at 04:59:18PM +0100, David Fetter wrote: > > > On Wed, Jan 15, 2020 at 03:45:12PM -0800, Jesse Zhang wrote: > > > > On Tue, Jan 14, 2020 at 2:09 PM David Fetter <da...@fetter.org> wrote: > > > > > > The changes in hash AM and SIMPLEHASH do look like a net positive > > > > > > improvement. My biggest cringe might be in pg_bitutils: > > > > > > > > > > > > 1. Is ceil_log2_64 dead code? > > > > > > > > > > Let's call it nascent code. I suspect there are places it could go, if > > > > > I look for them. Also, it seemed silly to have one without the other. > > > > > > > > > > > > > While not absolutely required, I'd like us to find at least one > > > > place and start using it. (Clang also nags at me when we have > > > > unused functions). > > > > > > Done in the expanded patches attached. I see that you've found use of it in dynahash, thanks!
The math in the new (from v4 to v6) patch is wrong: it yields ceil_log2(1) = 1 or next_power_of_2(1) = 2. I can see that you lifted the restriction of "num greater than one" for ceil_log2() in this patch set, but it's now _more_ problematic to base those functions on pg_leftmost_one_pos(). I'm not comfortable with your changes to pg_leftmost_one_pos() to remove the restriction on word being non-zero. Specifically pg_leftmost_one_pos() is made to return 0 on 0 input. While none of its current callers (in HEAD) is harmed, this introduces muddy semantics: 1. pg_leftmost_one_pos is semantically undefined on 0 input: scanning for a set bit in a zero word won't find it anywhere. 2. we can _try_ generalizing it to accommodate ceil_log2 by extrapolating based on the invariant that BSR + LZCNT = 31 (or 63). In that case, the extrapolation yields -1 for pg_leftmost_one_pos(0). I'm not convinced that others on the list will be comfortable with the generalization suggested in 2 above. I've quickly put together a PoC patch on top of yours, which re-implements ceil_log2 using LZCNT coupled with a CPUID check. Thoughts? Cheers, Jesse
From 0e4392d77b6132a508b7da14871cc99066a9d114 Mon Sep 17 00:00:00 2001 From: Jesse Zhang <sbje...@gmail.com> Date: Fri, 28 Feb 2020 16:22:04 -0800 Subject: [PATCH] Use LZCOUNT when possible This patch reverts the changes to pg_leftmost_one and friends (which is really bit-scan-reverse, and is legitimately undefined one zero) and reworks ceil_log2 and friends to use a count-leading-zeros operation that is well-defined on zero. --- src/include/port/pg_bitutils.h | 47 ++++++++++++--------- src/port/pg_bitutils.c | 77 ++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 20 deletions(-) diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 88a9ea5b7fb..b4d5724ee1d 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -20,18 +20,19 @@ extern PGDLLIMPORT const uint8 pg_number_of_ones[256]; /* * pg_leftmost_one_pos32 * Returns the position of the most significant set bit in "word", - * measured from the least significant bit. + * measured from the least significant bit. word must not be 0. */ static inline int pg_leftmost_one_pos32(uint32 word) { #ifdef HAVE__BUILTIN_CLZ - return word ? 31 - __builtin_clz(word) : 0; + Assert(word != 0); + + return 31 - __builtin_clz(word); #else int shift = 32 - 8; - if (word == 0) - return 0; + Assert(word != 0); while ((word >> shift) == 0) shift -= 8; @@ -48,18 +49,19 @@ static inline int pg_leftmost_one_pos64(uint64 word) { #ifdef HAVE__BUILTIN_CLZ + Assert(word != 0); + #if defined(HAVE_LONG_INT_64) - return word ? 63 - __builtin_clzl(word) : 0; + return 63 - __builtin_clzl(word); #elif defined(HAVE_LONG_LONG_INT_64) - return word ? 63 - __builtin_clzll(word) : 0; + return 63 - __builtin_clzll(word); #else #error must have a working 64-bit integer datatype #endif #else /* !HAVE__BUILTIN_CLZ */ int shift = 64 - 8; - if (word == 0) - return 0; + Assert(word != 0); while ((word >> shift) == 0) shift -= 8; @@ -71,18 +73,19 @@ pg_leftmost_one_pos64(uint64 word) /* * pg_rightmost_one_pos32 * Returns the position of the least significant set bit in "word", - * measured from the least significant bit. + * measured from the least significant bit. word must not be 0. */ static inline int pg_rightmost_one_pos32(uint32 word) { #ifdef HAVE__BUILTIN_CTZ - return word ? __builtin_ctz(word) : 32; + Assert(word != 0); + + return __builtin_ctz(word); #else int result = 0; - if (word == 0) - return 32; + Assert(word != 0); while ((word & 255) == 0) { @@ -102,18 +105,19 @@ static inline int pg_rightmost_one_pos64(uint64 word) { #ifdef HAVE__BUILTIN_CTZ + Assert(word != 0); + #if defined(HAVE_LONG_INT_64) - return word ? __builtin_ctzl(word) : 64; + return __builtin_ctzl(word); #elif defined(HAVE_LONG_LONG_INT_64) - return word ? __builtin_ctzll(word) : 64; + return __builtin_ctzll(word); #else #error must have a working 64-bit integer datatype #endif #else /* !HAVE__BUILTIN_CTZ */ int result = 0; - if (word == 0) - return 64; + Assert(word != 0); while ((word & 255) == 0) { @@ -141,19 +145,22 @@ pg_rotate_right32(uint32 word, int n) return (word >> n) | (word << (sizeof(word) * BITS_PER_BYTE - n)); } +extern int (*pg_count_leading_zeros_32)(uint32); +extern int (*pg_count_leading_zeros_64)(uint64); + /* ceil(lg2(num)) */ static inline uint32 ceil_log2_32(uint32 num) { Assert(num > 0); - return pg_leftmost_one_pos32(num-1) + 1; + return 8 * sizeof(num) - pg_count_leading_zeros_32(num-1); } static inline uint64 ceil_log2_64(uint64 num) { Assert(num > 0); - return pg_leftmost_one_pos64(num-1) + 1; + return 8 * sizeof(num) - pg_count_leading_zeros_64(num-1); } /* Calculate the first power of 2 >= num */ @@ -161,14 +168,14 @@ static inline uint32 next_power_of_2_32(uint32 num) { Assert(num > 0); - return ((uint32) 1) << (pg_leftmost_one_pos32(num-1) + 1); + return ((uint32) 1) << ceil_log2_32(num); } static inline uint64 next_power_of_2_64(uint64 num) { Assert(num > 0); - return ((uint64) 1) << (pg_leftmost_one_pos64(num-1) + 1); + return ((uint64) 1) << ceil_log2_64(num); } #endif /* PG_BITUTILS_H */ diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 392fbd33845..2732953a466 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -319,3 +319,80 @@ pg_popcount(const char *buf, int bytes) return popcnt; } + +static bool pg_lzcnt_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 0x80000001); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & bit_ABM) != 0; /* ABM / LZCNT */ +} + +static int +__attribute__((target("lzcnt"))) +pg_fast_count_leading_zeros_32(uint32 num) +{ + return __builtin_clz(num); +} + +static int +pg_slow_count_leading_zeros_32(uint32 num) +{ + if (num == 0) + return 8 * sizeof(num); + return __builtin_clz(num); +} + + +static int +__attribute__((target("lzcnt"))) +pg_fast_count_leading_zeros_64(uint64 num) +{ + return __builtin_clzll(num); +} + +static int +pg_slow_count_leading_zeros_64(uint64 num) +{ + if (num == 0) + return 8 * sizeof(num); + return __builtin_clzll(num); +} + +static int pg_count_leading_zeros_32_choose(uint32 num) +{ + if (pg_lzcnt_available()) + { + pg_count_leading_zeros_64 = pg_fast_count_leading_zeros_64; + pg_count_leading_zeros_32 = pg_fast_count_leading_zeros_32; + return pg_count_leading_zeros_32(num); + } + pg_count_leading_zeros_64 = pg_slow_count_leading_zeros_64; + pg_count_leading_zeros_32 = pg_slow_count_leading_zeros_32; + + return pg_slow_count_leading_zeros_32(num); +} + +static int pg_count_leading_zeros_64_choose(uint64 num) +{ + if (pg_lzcnt_available()) + { + pg_count_leading_zeros_64 = pg_fast_count_leading_zeros_64; + pg_count_leading_zeros_32 = pg_fast_count_leading_zeros_32; + return pg_count_leading_zeros_64(num); + } + pg_count_leading_zeros_64 = pg_slow_count_leading_zeros_64; + pg_count_leading_zeros_32 = pg_slow_count_leading_zeros_32; + + return pg_slow_count_leading_zeros_64(num); +} + +int (*pg_count_leading_zeros_32)(uint32) = pg_count_leading_zeros_32_choose; +int (*pg_count_leading_zeros_64)(uint64) = pg_count_leading_zeros_64_choose; -- 2.25.0