Hi David,

On Wed, Feb 26, 2020 at 9:56 PM David Fetter <da...@fetter.org> wrote:
>
> On Wed, Feb 26, 2020 at 09:12:24AM +0100, David Fetter wrote:
> > On Fri, Jan 31, 2020 at 04:59:18PM +0100, David Fetter wrote:
> > > On Wed, Jan 15, 2020 at 03:45:12PM -0800, Jesse Zhang wrote:
> > > > On Tue, Jan 14, 2020 at 2:09 PM David Fetter <da...@fetter.org> wrote:
> > > > > > The changes in hash AM and SIMPLEHASH do look like a net positive
> > > > > > improvement. My biggest cringe might be in pg_bitutils:
> > > > > >
> > > > > > 1. Is ceil_log2_64 dead code?
> > > > >
> > > > > Let's call it nascent code. I suspect there are places it could go, if
> > > > > I look for them.  Also, it seemed silly to have one without the other.
> > > > >
> > > >
> > > > While not absolutely required, I'd like us to find at least one
> > > > place and start using it. (Clang also nags at me when we have
> > > > unused functions).
> > >
> > > Done in the expanded patches attached.
I see that you've found use of it in dynahash, thanks!

The math in the new (from v4 to v6) patch is wrong: it yields
ceil_log2(1) = 1 or next_power_of_2(1) = 2. I can see that you lifted
the restriction of "num greater than one" for ceil_log2() in this patch
set, but it's now _more_ problematic to base those functions on
pg_leftmost_one_pos().

I'm not comfortable with your changes to pg_leftmost_one_pos() to remove
the restriction on word being non-zero. Specifically
pg_leftmost_one_pos() is made to return 0 on 0 input. While none of its
current callers (in HEAD) is harmed, this introduces muddy semantics:

1. pg_leftmost_one_pos is semantically undefined on 0 input: scanning
for a set bit in a zero word won't find it anywhere.

2. we can _try_ generalizing it to accommodate ceil_log2 by
extrapolating based on the invariant that BSR + LZCNT = 31 (or 63). In
that case, the extrapolation yields -1 for pg_leftmost_one_pos(0).

I'm not convinced that others on the list will be comfortable with the
generalization suggested in 2 above.

I've quickly put together a PoC patch on top of yours, which
re-implements ceil_log2 using LZCNT coupled with a CPUID check.
Thoughts?

Cheers,
Jesse
From 0e4392d77b6132a508b7da14871cc99066a9d114 Mon Sep 17 00:00:00 2001
From: Jesse Zhang <sbje...@gmail.com>
Date: Fri, 28 Feb 2020 16:22:04 -0800
Subject: [PATCH] Use LZCOUNT when possible

This patch reverts the changes to pg_leftmost_one and friends (which is
really bit-scan-reverse, and is legitimately undefined one zero) and
reworks ceil_log2 and friends to use a count-leading-zeros operation
that is well-defined on zero.
---
 src/include/port/pg_bitutils.h | 47 ++++++++++++---------
 src/port/pg_bitutils.c         | 77 ++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 20 deletions(-)

diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 88a9ea5b7fb..b4d5724ee1d 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -20,18 +20,19 @@ extern PGDLLIMPORT const uint8 pg_number_of_ones[256];
 /*
  * pg_leftmost_one_pos32
  *		Returns the position of the most significant set bit in "word",
- *		measured from the least significant bit.
+ *		measured from the least significant bit.  word must not be 0.
  */
 static inline int
 pg_leftmost_one_pos32(uint32 word)
 {
 #ifdef HAVE__BUILTIN_CLZ
-	return word ? 31 - __builtin_clz(word) : 0;
+	Assert(word != 0);
+
+	return 31 - __builtin_clz(word);
 #else
 	int			shift = 32 - 8;
 
-	if (word == 0)
-		return 0;
+	Assert(word != 0);
 
 	while ((word >> shift) == 0)
 		shift -= 8;
@@ -48,18 +49,19 @@ static inline int
 pg_leftmost_one_pos64(uint64 word)
 {
 #ifdef HAVE__BUILTIN_CLZ
+	Assert(word != 0);
+
 #if defined(HAVE_LONG_INT_64)
-	return word ? 63 - __builtin_clzl(word) : 0;
+	return 63 - __builtin_clzl(word);
 #elif defined(HAVE_LONG_LONG_INT_64)
-	return word ? 63 - __builtin_clzll(word) : 0;
+	return 63 - __builtin_clzll(word);
 #else
 #error must have a working 64-bit integer datatype
 #endif
 #else							/* !HAVE__BUILTIN_CLZ */
 	int			shift = 64 - 8;
 
-	if (word == 0)
-		return 0;
+	Assert(word != 0);
 
 	while ((word >> shift) == 0)
 		shift -= 8;
@@ -71,18 +73,19 @@ pg_leftmost_one_pos64(uint64 word)
 /*
  * pg_rightmost_one_pos32
  *		Returns the position of the least significant set bit in "word",
- *		measured from the least significant bit.
+ *		measured from the least significant bit.  word must not be 0.
  */
 static inline int
 pg_rightmost_one_pos32(uint32 word)
 {
 #ifdef HAVE__BUILTIN_CTZ
-	return word ? __builtin_ctz(word) : 32;
+	Assert(word != 0);
+
+	return __builtin_ctz(word);
 #else
 	int			result = 0;
 
-	if (word == 0)
-		return 32;
+	Assert(word != 0);
 
 	while ((word & 255) == 0)
 	{
@@ -102,18 +105,19 @@ static inline int
 pg_rightmost_one_pos64(uint64 word)
 {
 #ifdef HAVE__BUILTIN_CTZ
+	Assert(word != 0);
+
 #if defined(HAVE_LONG_INT_64)
-	return word ? __builtin_ctzl(word) : 64;
+	return __builtin_ctzl(word);
 #elif defined(HAVE_LONG_LONG_INT_64)
-	return word ? __builtin_ctzll(word) : 64;
+	return __builtin_ctzll(word);
 #else
 #error must have a working 64-bit integer datatype
 #endif
 #else							/* !HAVE__BUILTIN_CTZ */
 	int			result = 0;
 
-	if (word == 0)
-		return 64;
+	Assert(word != 0);
 
 	while ((word & 255) == 0)
 	{
@@ -141,19 +145,22 @@ pg_rotate_right32(uint32 word, int n)
 	return (word >> n) | (word << (sizeof(word) * BITS_PER_BYTE - n));
 }
 
+extern int (*pg_count_leading_zeros_32)(uint32);
+extern int (*pg_count_leading_zeros_64)(uint64);
+
 /* ceil(lg2(num)) */
 static inline uint32
 ceil_log2_32(uint32 num)
 {
 	Assert(num > 0);
-	return pg_leftmost_one_pos32(num-1) + 1;
+	return 8 * sizeof(num) - pg_count_leading_zeros_32(num-1);
 }
 
 static inline uint64
 ceil_log2_64(uint64 num)
 {
 	Assert(num > 0);
-	return pg_leftmost_one_pos64(num-1) + 1;
+	return 8 * sizeof(num) - pg_count_leading_zeros_64(num-1);
 }
 
 /* Calculate the first power of 2 >= num */
@@ -161,14 +168,14 @@ static inline uint32
 next_power_of_2_32(uint32 num)
 {
 	Assert(num > 0);
-	return ((uint32) 1) << (pg_leftmost_one_pos32(num-1) + 1);
+	return ((uint32) 1) << ceil_log2_32(num);
 }
 
 static inline uint64
 next_power_of_2_64(uint64 num)
 {
 	Assert(num > 0);
-	return ((uint64) 1) << (pg_leftmost_one_pos64(num-1) + 1);
+	return ((uint64) 1) << ceil_log2_64(num);
 }
 
 #endif							/* PG_BITUTILS_H */
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index 392fbd33845..2732953a466 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -319,3 +319,80 @@ pg_popcount(const char *buf, int bytes)
 
 	return popcnt;
 }
+
+static bool pg_lzcnt_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 0x80000001);
+#else
+#error cpuid instruction not available
+#endif
+
+	return (exx[2] & bit_ABM) != 0;	/* ABM / LZCNT */
+}
+
+static int
+__attribute__((target("lzcnt")))
+pg_fast_count_leading_zeros_32(uint32 num)
+{
+	return __builtin_clz(num);
+}
+
+static int
+pg_slow_count_leading_zeros_32(uint32 num)
+{
+	if (num == 0)
+		return 8 * sizeof(num);
+	return __builtin_clz(num);
+}
+
+
+static int
+__attribute__((target("lzcnt")))
+pg_fast_count_leading_zeros_64(uint64 num)
+{
+	return __builtin_clzll(num);
+}
+
+static int
+pg_slow_count_leading_zeros_64(uint64 num)
+{
+	if (num == 0)
+		return 8 * sizeof(num);
+	return __builtin_clzll(num);
+}
+
+static int pg_count_leading_zeros_32_choose(uint32 num)
+{
+	if (pg_lzcnt_available())
+	{
+		pg_count_leading_zeros_64 = pg_fast_count_leading_zeros_64;
+		pg_count_leading_zeros_32 = pg_fast_count_leading_zeros_32;
+		return pg_count_leading_zeros_32(num);
+	}
+	pg_count_leading_zeros_64 = pg_slow_count_leading_zeros_64;
+	pg_count_leading_zeros_32 = pg_slow_count_leading_zeros_32;
+
+	return pg_slow_count_leading_zeros_32(num);
+}
+
+static int pg_count_leading_zeros_64_choose(uint64 num)
+{
+	if (pg_lzcnt_available())
+	{
+		pg_count_leading_zeros_64 = pg_fast_count_leading_zeros_64;
+		pg_count_leading_zeros_32 = pg_fast_count_leading_zeros_32;
+		return pg_count_leading_zeros_64(num);
+	}
+	pg_count_leading_zeros_64 = pg_slow_count_leading_zeros_64;
+	pg_count_leading_zeros_32 = pg_slow_count_leading_zeros_32;
+
+	return pg_slow_count_leading_zeros_64(num);
+}
+
+int (*pg_count_leading_zeros_32)(uint32) = pg_count_leading_zeros_32_choose;
+int (*pg_count_leading_zeros_64)(uint64) = pg_count_leading_zeros_64_choose;
-- 
2.25.0

Reply via email to