Re: Use compiler intrinsics for bit ops in hash

David Rowley Tue, 07 Apr 2020 04:41:26 -0700

On Thu, 12 Mar 2020 at 22:59, John Naylor <john.nay...@2ndquadrant.com> wrote:
>
> On Thu, Mar 12, 2020 at 7:42 AM David Rowley <dgrowle...@gmail.com> wrote:
> >
> > I don't think Jesse's proposed solution is that great due to the
> > additional function call overhead for pg_count_leading_zeros_32(). The
> > (num & (num - 1)) == 0 I imagine will perform better, but I didn't
> > test it.
>
> Right, I believe we've all landed on the same page about that. I see
> two ways of doing next_power_of_2_32 without an indirect function
> call, and leaving pg_leftmost_one_pos32 the same as it is now. I
> haven't measured either yet (or tested for that matter):


I've attached an updated patch.  It includes the modifications
mentioned above to pre-check for a power of 2 number with the bit
masking hack mentioned above. I also renamed the functions to be more
aligned to the other functions in pg_bitutils.h   I'm not convinced
pg_ceil_log2_* needs the word "ceil" in there.

I dropped the part of the patch that was changing longs to ints of a
known size. I went on and did some additional conversion in the 0003
patch. There are more laying around the code base, but I ended up
finding a bit to fix up than i had thought I would. e.g. various
places that repalloc() inside a loop that is multiplying the
allocation size by 2 each time.  The repalloc should be done at the
end, not during the loop.  I thought I might come back to those some
time in the future.

Is anyone able to have a look at this?

David

From b69f994a8f284b7931f5383a8de323a2986d86e5 Mon Sep 17 00:00:00 2001
From: "dgrow...@gmail.com" <dgrow...@gmail.com>
Date: Tue, 7 Apr 2020 11:02:47 +1200
Subject: [PATCH v7 1/3] Add functions to calculate the next power of 2

There are many areas in the code where we need to determine the next
highest power of 2 of a given number.  We tend to always do that in an
ad-hoc way each time, generally with some tight for loop which performs a
bitshift left once per loop and goes until it finds a number above the
given number.

Here we add two generic functions which make use of the existing
pg_leftmost_one_pos* functions which, when available will allow us to
calculate the next power of 2 without any looping.

Here we don't add any code which uses these new functions. That will be
done in followup commits.
---
 src/include/port/pg_bitutils.h | 72 ++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 498e532308..4ca92f076d 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -129,6 +129,78 @@ pg_rightmost_one_pos64(uint64 word)
 #endif							/* HAVE__BUILTIN_CTZ */
 }
 
+/*
+ * pg_nextpower2_32
+ *		Returns the next highest power of 2 of 'num', or 'num', if it's
+ *		already a power of 2.
+ *
+ * 'num' mustn't be 0 or be above PG_UINT32_MAX / 2 + 1.
+ */
+static inline uint32
+pg_nextpower2_32(uint32 num)
+{
+	Assert(num > 0 && num <= PG_UINT32_MAX / 2 + 1);
+
+	/*
+	 * A power 2 number has only 1 bit set.  Subtracting 1 from such a number
+	 * will turn on all previous bits resulting in no common bits being set
+	 * between num and num-1.
+	 */
+	if ((num & (num - 1)) == 0)
+		return num;				/* already power 2 */
+
+	return ((uint32) 1) << (pg_leftmost_one_pos32(num) + 1);
+}
+
+/*
+ * pg_nextpower2_64
+ *		Returns the next highest power of 2 of 'num', or 'num', if it's
+ *		already a power of 2.
+ *
+ * 'num' mustn't be 0 or be above PG_UINT64_MAX / 2  + 1.
+ */
+static inline uint64
+pg_nextpower2_64(uint64 num)
+{
+	Assert(num > 0 && num <= PG_UINT64_MAX / 2 + 1);
+
+	/*
+	 * A power 2 number has only 1 bit set.  Subtracting 1 from such a number
+	 * will turn on all previous bits resulting in no common bits being set
+	 * between num and num-1.
+	 */
+	if ((num & (num - 1)) == 0)
+		return num;				/* already power 2 */
+
+	return ((uint64) 1) << (pg_leftmost_one_pos64(num) + 1);
+}
+
+/*
+ * pg_ceil_log2_32
+ *		Returns equivalent of ceil(log2(num))
+ */
+static inline uint32
+pg_ceil_log2_32(uint32 num)
+{
+	if (num < 2)
+		return 0;
+	else
+		return pg_leftmost_one_pos32(num - 1) + 1;
+}
+
+/*
+ * pg_ceil_log2_64
+ *		Returns equivalent of ceil(log2(num))
+ */
+static inline uint64
+pg_ceil_log2_64(uint64 num)
+{
+	if (num < 2)
+		return 0;
+	else
+		return pg_leftmost_one_pos64(num - 1) + 1;
+}
+
 /* Count the number of one-bits in a uint32 or uint64 */
 extern int	(*pg_popcount32) (uint32 word);
 extern int	(*pg_popcount64) (uint64 word);
-- 
2.25.1

From 63cc6c0d302bf4c0f6136ae237ce1b7acfa47db6 Mon Sep 17 00:00:00 2001
From: "dgrow...@gmail.com" <dgrow...@gmail.com>
Date: Tue, 7 Apr 2020 22:50:27 +1200
Subject: [PATCH v7 3/3] Modify additional power 2 calculations to use new
 helper functions

2nd pass of modifying various places which obtain the next power
of 2 of a number and make them use the new functions added in
pg_bitutils instead.
---
 src/backend/access/gin/ginfast.c    | 12 +++---------
 src/backend/executor/nodeHash.c     |  8 ++------
 src/backend/nodes/list.c            | 15 +++++++--------
 src/backend/statistics/mvdistinct.c | 10 +---------
 src/backend/utils/adt/arrayfuncs.c  |  9 +++------
 5 files changed, 16 insertions(+), 38 deletions(-)

diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index 11d7ec067a..2e41b34d8d 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -25,6 +25,7 @@
 #include "catalog/pg_am.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
+#include "port/pg_bitutils.h"
 #include "postmaster/autovacuum.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
@@ -503,10 +504,7 @@ ginHeapTupleFastCollect(GinState *ginstate,
 		 * initially.  Make it a power of 2 to avoid wasting memory when
 		 * resizing (since palloc likes powers of 2).
 		 */
-		collector->lentuples = 16;
-		while (collector->lentuples < nentries)
-			collector->lentuples *= 2;
-
+		collector->lentuples = pg_nextpower2_32(Max(16, nentries));
 		collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) * collector->lentuples);
 	}
 	else if (collector->lentuples < collector->ntuples + nentries)
@@ -516,11 +514,7 @@ ginHeapTupleFastCollect(GinState *ginstate,
 		 * overflow, though we could get to a value that exceeds
 		 * MaxAllocSize/sizeof(IndexTuple), causing an error in repalloc.
 		 */
-		do
-		{
-			collector->lentuples *= 2;
-		} while (collector->lentuples < collector->ntuples + nentries);
-
+		collector->lentuples = pg_nextpower2_32(collector->ntuples + nentries);
 		collector->tuples = (IndexTuple *) repalloc(collector->tuples,
 													sizeof(IndexTuple) * collector->lentuples);
 	}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index b6d5084908..c881dc1de8 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -831,9 +831,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		dbatch = ceil(inner_rel_bytes / (hash_table_bytes - bucket_bytes));
 		dbatch = Min(dbatch, max_pointers);
 		minbatch = (int) dbatch;
-		nbatch = 2;
-		while (nbatch < minbatch)
-			nbatch <<= 1;
+		nbatch = pg_nextpower2_32(Max(2, minbatch));
 	}
 
 	Assert(nbuckets > 0);
@@ -2272,9 +2270,7 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
 		 * MaxAllocSize/sizeof(void *)/8, but that is not currently possible
 		 * since we limit pg_statistic entries to much less than that.
 		 */
-		nbuckets = 2;
-		while (nbuckets <= mcvsToUse)
-			nbuckets <<= 1;
+		nbuckets = pg_nextpower2_32(mcvsToUse + 1);
 		/* use two more bits just to help avoid collisions */
 		nbuckets <<= 2;
 
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index bd0c58cd81..80fa8c84e4 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -18,6 +18,7 @@
 #include "postgres.h"
 
 #include "nodes/pg_list.h"
+#include "port/pg_bitutils.h"
 #include "utils/memdebug.h"
 #include "utils/memutils.h"
 
@@ -119,9 +120,7 @@ new_list(NodeTag type, int min_size)
 	 * that's more than twice the size of an existing list, so the size limits
 	 * within palloc will ensure that we don't overflow here.
 	 */
-	max_size = 8;				/* semi-arbitrary small power of 2 */
-	while (max_size < min_size + LIST_HEADER_OVERHEAD)
-		max_size *= 2;
+	max_size = pg_nextpower2_32(Max(8, min_size + LIST_HEADER_OVERHEAD));
 	max_size -= LIST_HEADER_OVERHEAD;
 #else
 
@@ -160,12 +159,12 @@ enlarge_list(List *list, int min_size)
 
 	/*
 	 * As above, we prefer power-of-two total allocations; but here we need
-	 * not account for list header overhead.  The existing max length might
-	 * not be a power of 2, so don't rely on that.
+	 * not account for list header overhead.
 	 */
-	new_max_len = 16;			/* semi-arbitrary small power of 2 */
-	while (new_max_len < min_size)
-		new_max_len *= 2;
+
+	/* clamp the minimum value to 16, a semi-arbitrary small power of 2 */
+	new_max_len = pg_nextpower2_32(Max(16, min_size));
+
 #else
 	/* As above, don't allocate anything extra */
 	new_max_len = min_size;
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 977d6f3e2e..4b86f0ab2d 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -576,15 +576,7 @@ n_choose_k(int n, int k)
 static int
 num_combinations(int n)
 {
-	int			k;
-	int			ncombs = 1;
-
-	for (k = 1; k <= n; k++)
-		ncombs *= 2;
-
-	ncombs -= (n + 1);
-
-	return ncombs;
+	return (1 << n) - (n + 1);
 }
 
 /*
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 7a4a5aaa86..11987c8f3b 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -24,6 +24,7 @@
 #include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
 #include "optimizer/optimizer.h"
+#include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/arrayaccess.h"
 #include "utils/builtins.h"
@@ -5313,9 +5314,7 @@ accumArrayResultArr(ArrayBuildStateArr *astate,
 		memcpy(&astate->lbs[1], lbs, ndims * sizeof(int));
 
 		/* Allocate at least enough data space for this item */
-		astate->abytes = 1024;
-		while (astate->abytes <= ndatabytes)
-			astate->abytes *= 2;
+		astate->abytes = pg_nextpower2_32(Max(1024, ndatabytes + 1));
 		astate->data = (char *) palloc(astate->abytes);
 	}
 	else
@@ -5362,9 +5361,7 @@ accumArrayResultArr(ArrayBuildStateArr *astate,
 			 * First input with nulls; we must retrospectively handle any
 			 * previous inputs by marking all their items non-null.
 			 */
-			astate->aitems = 256;
-			while (astate->aitems <= newnitems)
-				astate->aitems *= 2;
+			astate->aitems = pg_nextpower2_32(Max(256, newnitems + 1));
 			astate->nullbitmap = (bits8 *) palloc((astate->aitems + 7) / 8);
 			array_bitmap_copy(astate->nullbitmap, 0,
 							  NULL, 0,
-- 
2.25.1

From 79a1d94225dae859a5eb0c39062e6cb924664413 Mon Sep 17 00:00:00 2001
From: "dgrow...@gmail.com" <dgrow...@gmail.com>
Date: Tue, 7 Apr 2020 17:56:54 +1200
Subject: [PATCH v7 2/3] Modify various power 2 calculations to use new helper
 functions

First pass of modifying various places which obtain the next power of 2 of
a number and make them use the new functions added in pg_bitutils instead.
---
 src/backend/access/hash/hashpage.c | 24 +++++++++++-------------
 src/backend/access/hash/hashsort.c |  3 ++-
 src/backend/access/hash/hashutil.c |  4 ++--
 src/backend/utils/hash/dynahash.c  | 14 +++++++-------
 src/include/lib/simplehash.h       | 27 ++++-----------------------
 src/tools/msvc/Mkvcbuild.pm        |  2 +-
 6 files changed, 27 insertions(+), 47 deletions(-)

diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 55d85644a4..a664ecf494 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -31,6 +31,7 @@
 #include "access/hash.h"
 #include "access/hash_xlog.h"
 #include "miscadmin.h"
+#include "port/pg_bitutils.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
 #include "storage/smgr.h"
@@ -502,7 +503,7 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
 	double		dnumbuckets;
 	uint32		num_buckets;
 	uint32		spare_index;
-	uint32		i;
+	uint32		lshift;
 
 	/*
 	 * Choose the number of initial bucket pages to match the fill factor
@@ -542,15 +543,12 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
 	metap->hashm_nmaps = 0;
 	metap->hashm_ffactor = ffactor;
 	metap->hashm_bsize = HashGetMaxBitmapSize(page);
+
 	/* find largest bitmap array size that will fit in page size */
-	for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
-	{
-		if ((1 << i) <= metap->hashm_bsize)
-			break;
-	}
-	Assert(i > 0);
-	metap->hashm_bmsize = 1 << i;
-	metap->hashm_bmshift = i + BYTE_TO_BIT;
+	lshift = pg_leftmost_one_pos32(metap->hashm_bsize);
+	Assert(lshift > 0);
+	metap->hashm_bmsize = 1 << lshift;
+	metap->hashm_bmshift = lshift + BYTE_TO_BIT;
 	Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));
 
 	/*
@@ -570,7 +568,7 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
 	 * Set highmask as next immediate ((2 ^ x) - 1), which should be
 	 * sufficient to cover num_buckets.
 	 */
-	metap->hashm_highmask = (1 << (_hash_log2(num_buckets + 1))) - 1;
+	metap->hashm_highmask = pg_nextpower2_32(num_buckets + 1) - 1;
 	metap->hashm_lowmask = (metap->hashm_highmask >> 1);
 
 	MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
@@ -659,9 +657,9 @@ restart_expand:
 	 *
 	 * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because
 	 * the calculation maxbucket+1 mustn't overflow).  Currently we restrict
-	 * to half that because of overflow looping in _hash_log2() and
-	 * insufficient space in hashm_spares[].  It's moot anyway because an
-	 * index with 2^32 buckets would certainly overflow BlockNumber and hence
+	 * to half that to prevent failure of pg_ceil_log2_32() and insufficient
+	 * space in hashm_spares[].  It's moot anyway because an index with 2^32
+	 * buckets would certainly overflow BlockNumber and hence
 	 * _hash_alloc_buckets() would fail, but if we supported buckets smaller
 	 * than a disk block then this would be an independent constraint.
 	 *
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c
index 9cb41d62e7..2c7b5857b5 100644
--- a/src/backend/access/hash/hashsort.c
+++ b/src/backend/access/hash/hashsort.c
@@ -29,6 +29,7 @@
 #include "commands/progress.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "port/pg_bitutils.h"
 #include "utils/tuplesort.h"
 
 
@@ -69,7 +70,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
 	 * NOTE : This hash mask calculation should be in sync with similar
 	 * calculation in _hash_init_metabuffer.
 	 */
-	hspool->high_mask = (((uint32) 1) << _hash_log2(num_buckets + 1)) - 1;
+	hspool->high_mask = pg_nextpower2_32(num_buckets + 1) - 1;
 	hspool->low_mask = (hspool->high_mask >> 1);
 	hspool->max_buckets = num_buckets - 1;
 
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index 9efc8016bc..9770d17ff4 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -17,6 +17,7 @@
 #include "access/hash.h"
 #include "access/reloptions.h"
 #include "access/relscan.h"
+#include "port/pg_bitutils.h"
 #include "storage/buf_internals.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
@@ -158,8 +159,7 @@ _hash_spareindex(uint32 num_bucket)
 {
 	uint32		splitpoint_group;
 	uint32		splitpoint_phases;
-
-	splitpoint_group = _hash_log2(num_bucket);
+	splitpoint_group = pg_ceil_log2_32(num_bucket);
 
 	if (splitpoint_group < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE)
 		return splitpoint_group;
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index b5381958e7..5b4b9e487f 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -87,6 +87,7 @@
 
 #include "access/xact.h"
 #include "common/hashfn.h"
+#include "port/pg_bitutils.h"
 #include "storage/shmem.h"
 #include "storage/spin.h"
 #include "utils/dynahash.h"
@@ -1718,16 +1719,15 @@ hash_corrupted(HTAB *hashp)
 int
 my_log2(long num)
 {
-	int			i;
-	long		limit;
-
-	/* guard against too-large input, which would put us into infinite loop */
+	/* guard against too-large input, which would be invalid for pg_ceil_log2_*() */
 	if (num > LONG_MAX / 2)
 		num = LONG_MAX / 2;
 
-	for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
-		;
-	return i;
+#if SIZEOF_LONG < 8
+	return pg_ceil_log2_32(num);
+#else
+	return pg_ceil_log2_64(num);
+#endif
 }
 
 /* calculate first power of 2 >= num, bounded to what will fit in a long */
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index 5a6783f653..8cb03cda6c 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -57,6 +57,8 @@
  *	  backwards, unless they're empty or already at their optimal position.
  */
 
+#include "port/pg_bitutils.h"
+
 /* helpers */
 #define SH_MAKE_PREFIX(a) CppConcat(a,_)
 #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
@@ -215,27 +217,6 @@ SH_SCOPE void SH_STAT(SH_TYPE * tb);
 #ifndef SIMPLEHASH_H
 #define SIMPLEHASH_H
 
-/* FIXME: can we move these to a central location? */
-
-/* calculate ceil(log base 2) of num */
-static inline uint64
-sh_log2(uint64 num)
-{
-	int			i;
-	uint64		limit;
-
-	for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
-		;
-	return i;
-}
-
-/* calculate first power of 2 >= num */
-static inline uint64
-sh_pow2(uint64 num)
-{
-	return ((uint64) 1) << sh_log2(num);
-}
-
 #ifdef FRONTEND
 #define sh_error(...) pg_log_error(__VA_ARGS__)
 #define sh_log(...) pg_log_info(__VA_ARGS__)
@@ -259,7 +240,7 @@ SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
 	size = Max(newsize, 2);
 
 	/* round up size to the next power of 2, that's how bucketing works */
-	size = sh_pow2(size);
+	size = pg_nextpower2_64(size);
 	Assert(size <= SH_MAX_SIZE);
 
 	/*
@@ -434,7 +415,7 @@ SH_GROW(SH_TYPE * tb, uint32 newsize)
 	uint32		startelem = 0;
 	uint32		copyelem;
 
-	Assert(oldsize == sh_pow2(oldsize));
+	Assert(oldsize == pg_nextpower2_64(oldsize));
 	Assert(oldsize != SH_MAX_SIZE);
 	Assert(oldsize < newsize);
 
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 72a21dbd41..fd7d4021a5 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -81,7 +81,7 @@ my $frontend_extrasource = {
 };
 my @frontend_excludes = (
 	'pgevent',    'pg_basebackup', 'pg_rewind', 'pg_dump',
-	'pg_waldump', 'scripts');
+	'pg_validatebackup', 'pg_waldump', 'scripts');
 
 sub mkvcbuild
 {
-- 
2.25.1

Re: Use compiler intrinsics for bit ops in hash

Reply via email to