On Thu, 12 Mar 2020 at 22:59, John Naylor <john.nay...@2ndquadrant.com> wrote: > > On Thu, Mar 12, 2020 at 7:42 AM David Rowley <dgrowle...@gmail.com> wrote: > > > > I don't think Jesse's proposed solution is that great due to the > > additional function call overhead for pg_count_leading_zeros_32(). The > > (num & (num - 1)) == 0 I imagine will perform better, but I didn't > > test it. > > Right, I believe we've all landed on the same page about that. I see > two ways of doing next_power_of_2_32 without an indirect function > call, and leaving pg_leftmost_one_pos32 the same as it is now. I > haven't measured either yet (or tested for that matter):
I've attached an updated patch. It includes the modifications mentioned above to pre-check for a power of 2 number with the bit masking hack mentioned above. I also renamed the functions to be more aligned to the other functions in pg_bitutils.h I'm not convinced pg_ceil_log2_* needs the word "ceil" in there. I dropped the part of the patch that was changing longs to ints of a known size. I went on and did some additional conversion in the 0003 patch. There are more laying around the code base, but I ended up finding a bit to fix up than i had thought I would. e.g. various places that repalloc() inside a loop that is multiplying the allocation size by 2 each time. The repalloc should be done at the end, not during the loop. I thought I might come back to those some time in the future. Is anyone able to have a look at this? David
From b69f994a8f284b7931f5383a8de323a2986d86e5 Mon Sep 17 00:00:00 2001 From: "dgrow...@gmail.com" <dgrow...@gmail.com> Date: Tue, 7 Apr 2020 11:02:47 +1200 Subject: [PATCH v7 1/3] Add functions to calculate the next power of 2 There are many areas in the code where we need to determine the next highest power of 2 of a given number. We tend to always do that in an ad-hoc way each time, generally with some tight for loop which performs a bitshift left once per loop and goes until it finds a number above the given number. Here we add two generic functions which make use of the existing pg_leftmost_one_pos* functions which, when available will allow us to calculate the next power of 2 without any looping. Here we don't add any code which uses these new functions. That will be done in followup commits. --- src/include/port/pg_bitutils.h | 72 ++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 498e532308..4ca92f076d 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -129,6 +129,78 @@ pg_rightmost_one_pos64(uint64 word) #endif /* HAVE__BUILTIN_CTZ */ } +/* + * pg_nextpower2_32 + * Returns the next highest power of 2 of 'num', or 'num', if it's + * already a power of 2. + * + * 'num' mustn't be 0 or be above PG_UINT32_MAX / 2 + 1. + */ +static inline uint32 +pg_nextpower2_32(uint32 num) +{ + Assert(num > 0 && num <= PG_UINT32_MAX / 2 + 1); + + /* + * A power 2 number has only 1 bit set. Subtracting 1 from such a number + * will turn on all previous bits resulting in no common bits being set + * between num and num-1. + */ + if ((num & (num - 1)) == 0) + return num; /* already power 2 */ + + return ((uint32) 1) << (pg_leftmost_one_pos32(num) + 1); +} + +/* + * pg_nextpower2_64 + * Returns the next highest power of 2 of 'num', or 'num', if it's + * already a power of 2. + * + * 'num' mustn't be 0 or be above PG_UINT64_MAX / 2 + 1. + */ +static inline uint64 +pg_nextpower2_64(uint64 num) +{ + Assert(num > 0 && num <= PG_UINT64_MAX / 2 + 1); + + /* + * A power 2 number has only 1 bit set. Subtracting 1 from such a number + * will turn on all previous bits resulting in no common bits being set + * between num and num-1. + */ + if ((num & (num - 1)) == 0) + return num; /* already power 2 */ + + return ((uint64) 1) << (pg_leftmost_one_pos64(num) + 1); +} + +/* + * pg_ceil_log2_32 + * Returns equivalent of ceil(log2(num)) + */ +static inline uint32 +pg_ceil_log2_32(uint32 num) +{ + if (num < 2) + return 0; + else + return pg_leftmost_one_pos32(num - 1) + 1; +} + +/* + * pg_ceil_log2_64 + * Returns equivalent of ceil(log2(num)) + */ +static inline uint64 +pg_ceil_log2_64(uint64 num) +{ + if (num < 2) + return 0; + else + return pg_leftmost_one_pos64(num - 1) + 1; +} + /* Count the number of one-bits in a uint32 or uint64 */ extern int (*pg_popcount32) (uint32 word); extern int (*pg_popcount64) (uint64 word); -- 2.25.1
From 63cc6c0d302bf4c0f6136ae237ce1b7acfa47db6 Mon Sep 17 00:00:00 2001 From: "dgrow...@gmail.com" <dgrow...@gmail.com> Date: Tue, 7 Apr 2020 22:50:27 +1200 Subject: [PATCH v7 3/3] Modify additional power 2 calculations to use new helper functions 2nd pass of modifying various places which obtain the next power of 2 of a number and make them use the new functions added in pg_bitutils instead. --- src/backend/access/gin/ginfast.c | 12 +++--------- src/backend/executor/nodeHash.c | 8 ++------ src/backend/nodes/list.c | 15 +++++++-------- src/backend/statistics/mvdistinct.c | 10 +--------- src/backend/utils/adt/arrayfuncs.c | 9 +++------ 5 files changed, 16 insertions(+), 38 deletions(-) diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 11d7ec067a..2e41b34d8d 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -25,6 +25,7 @@ #include "catalog/pg_am.h" #include "commands/vacuum.h" #include "miscadmin.h" +#include "port/pg_bitutils.h" #include "postmaster/autovacuum.h" #include "storage/indexfsm.h" #include "storage/lmgr.h" @@ -503,10 +504,7 @@ ginHeapTupleFastCollect(GinState *ginstate, * initially. Make it a power of 2 to avoid wasting memory when * resizing (since palloc likes powers of 2). */ - collector->lentuples = 16; - while (collector->lentuples < nentries) - collector->lentuples *= 2; - + collector->lentuples = pg_nextpower2_32(Max(16, nentries)); collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) * collector->lentuples); } else if (collector->lentuples < collector->ntuples + nentries) @@ -516,11 +514,7 @@ ginHeapTupleFastCollect(GinState *ginstate, * overflow, though we could get to a value that exceeds * MaxAllocSize/sizeof(IndexTuple), causing an error in repalloc. */ - do - { - collector->lentuples *= 2; - } while (collector->lentuples < collector->ntuples + nentries); - + collector->lentuples = pg_nextpower2_32(collector->ntuples + nentries); collector->tuples = (IndexTuple *) repalloc(collector->tuples, sizeof(IndexTuple) * collector->lentuples); } diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index b6d5084908..c881dc1de8 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -831,9 +831,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, dbatch = ceil(inner_rel_bytes / (hash_table_bytes - bucket_bytes)); dbatch = Min(dbatch, max_pointers); minbatch = (int) dbatch; - nbatch = 2; - while (nbatch < minbatch) - nbatch <<= 1; + nbatch = pg_nextpower2_32(Max(2, minbatch)); } Assert(nbuckets > 0); @@ -2272,9 +2270,7 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) * MaxAllocSize/sizeof(void *)/8, but that is not currently possible * since we limit pg_statistic entries to much less than that. */ - nbuckets = 2; - while (nbuckets <= mcvsToUse) - nbuckets <<= 1; + nbuckets = pg_nextpower2_32(mcvsToUse + 1); /* use two more bits just to help avoid collisions */ nbuckets <<= 2; diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index bd0c58cd81..80fa8c84e4 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -18,6 +18,7 @@ #include "postgres.h" #include "nodes/pg_list.h" +#include "port/pg_bitutils.h" #include "utils/memdebug.h" #include "utils/memutils.h" @@ -119,9 +120,7 @@ new_list(NodeTag type, int min_size) * that's more than twice the size of an existing list, so the size limits * within palloc will ensure that we don't overflow here. */ - max_size = 8; /* semi-arbitrary small power of 2 */ - while (max_size < min_size + LIST_HEADER_OVERHEAD) - max_size *= 2; + max_size = pg_nextpower2_32(Max(8, min_size + LIST_HEADER_OVERHEAD)); max_size -= LIST_HEADER_OVERHEAD; #else @@ -160,12 +159,12 @@ enlarge_list(List *list, int min_size) /* * As above, we prefer power-of-two total allocations; but here we need - * not account for list header overhead. The existing max length might - * not be a power of 2, so don't rely on that. + * not account for list header overhead. */ - new_max_len = 16; /* semi-arbitrary small power of 2 */ - while (new_max_len < min_size) - new_max_len *= 2; + + /* clamp the minimum value to 16, a semi-arbitrary small power of 2 */ + new_max_len = pg_nextpower2_32(Max(16, min_size)); + #else /* As above, don't allocate anything extra */ new_max_len = min_size; diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 977d6f3e2e..4b86f0ab2d 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -576,15 +576,7 @@ n_choose_k(int n, int k) static int num_combinations(int n) { - int k; - int ncombs = 1; - - for (k = 1; k <= n; k++) - ncombs *= 2; - - ncombs -= (n + 1); - - return ncombs; + return (1 << n) - (n + 1); } /* diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 7a4a5aaa86..11987c8f3b 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -24,6 +24,7 @@ #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" #include "optimizer/optimizer.h" +#include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/arrayaccess.h" #include "utils/builtins.h" @@ -5313,9 +5314,7 @@ accumArrayResultArr(ArrayBuildStateArr *astate, memcpy(&astate->lbs[1], lbs, ndims * sizeof(int)); /* Allocate at least enough data space for this item */ - astate->abytes = 1024; - while (astate->abytes <= ndatabytes) - astate->abytes *= 2; + astate->abytes = pg_nextpower2_32(Max(1024, ndatabytes + 1)); astate->data = (char *) palloc(astate->abytes); } else @@ -5362,9 +5361,7 @@ accumArrayResultArr(ArrayBuildStateArr *astate, * First input with nulls; we must retrospectively handle any * previous inputs by marking all their items non-null. */ - astate->aitems = 256; - while (astate->aitems <= newnitems) - astate->aitems *= 2; + astate->aitems = pg_nextpower2_32(Max(256, newnitems + 1)); astate->nullbitmap = (bits8 *) palloc((astate->aitems + 7) / 8); array_bitmap_copy(astate->nullbitmap, 0, NULL, 0, -- 2.25.1
From 79a1d94225dae859a5eb0c39062e6cb924664413 Mon Sep 17 00:00:00 2001 From: "dgrow...@gmail.com" <dgrow...@gmail.com> Date: Tue, 7 Apr 2020 17:56:54 +1200 Subject: [PATCH v7 2/3] Modify various power 2 calculations to use new helper functions First pass of modifying various places which obtain the next power of 2 of a number and make them use the new functions added in pg_bitutils instead. --- src/backend/access/hash/hashpage.c | 24 +++++++++++------------- src/backend/access/hash/hashsort.c | 3 ++- src/backend/access/hash/hashutil.c | 4 ++-- src/backend/utils/hash/dynahash.c | 14 +++++++------- src/include/lib/simplehash.h | 27 ++++----------------------- src/tools/msvc/Mkvcbuild.pm | 2 +- 6 files changed, 27 insertions(+), 47 deletions(-) diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 55d85644a4..a664ecf494 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -31,6 +31,7 @@ #include "access/hash.h" #include "access/hash_xlog.h" #include "miscadmin.h" +#include "port/pg_bitutils.h" #include "storage/lmgr.h" #include "storage/predicate.h" #include "storage/smgr.h" @@ -502,7 +503,7 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, double dnumbuckets; uint32 num_buckets; uint32 spare_index; - uint32 i; + uint32 lshift; /* * Choose the number of initial bucket pages to match the fill factor @@ -542,15 +543,12 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, metap->hashm_nmaps = 0; metap->hashm_ffactor = ffactor; metap->hashm_bsize = HashGetMaxBitmapSize(page); + /* find largest bitmap array size that will fit in page size */ - for (i = _hash_log2(metap->hashm_bsize); i > 0; --i) - { - if ((1 << i) <= metap->hashm_bsize) - break; - } - Assert(i > 0); - metap->hashm_bmsize = 1 << i; - metap->hashm_bmshift = i + BYTE_TO_BIT; + lshift = pg_leftmost_one_pos32(metap->hashm_bsize); + Assert(lshift > 0); + metap->hashm_bmsize = 1 << lshift; + metap->hashm_bmshift = lshift + BYTE_TO_BIT; Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1)); /* @@ -570,7 +568,7 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, * Set highmask as next immediate ((2 ^ x) - 1), which should be * sufficient to cover num_buckets. */ - metap->hashm_highmask = (1 << (_hash_log2(num_buckets + 1))) - 1; + metap->hashm_highmask = pg_nextpower2_32(num_buckets + 1) - 1; metap->hashm_lowmask = (metap->hashm_highmask >> 1); MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares)); @@ -659,9 +657,9 @@ restart_expand: * * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because * the calculation maxbucket+1 mustn't overflow). Currently we restrict - * to half that because of overflow looping in _hash_log2() and - * insufficient space in hashm_spares[]. It's moot anyway because an - * index with 2^32 buckets would certainly overflow BlockNumber and hence + * to half that to prevent failure of pg_ceil_log2_32() and insufficient + * space in hashm_spares[]. It's moot anyway because an index with 2^32 + * buckets would certainly overflow BlockNumber and hence * _hash_alloc_buckets() would fail, but if we supported buckets smaller * than a disk block then this would be an independent constraint. * diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c index 9cb41d62e7..2c7b5857b5 100644 --- a/src/backend/access/hash/hashsort.c +++ b/src/backend/access/hash/hashsort.c @@ -29,6 +29,7 @@ #include "commands/progress.h" #include "miscadmin.h" #include "pgstat.h" +#include "port/pg_bitutils.h" #include "utils/tuplesort.h" @@ -69,7 +70,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) * NOTE : This hash mask calculation should be in sync with similar * calculation in _hash_init_metabuffer. */ - hspool->high_mask = (((uint32) 1) << _hash_log2(num_buckets + 1)) - 1; + hspool->high_mask = pg_nextpower2_32(num_buckets + 1) - 1; hspool->low_mask = (hspool->high_mask >> 1); hspool->max_buckets = num_buckets - 1; diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index 9efc8016bc..9770d17ff4 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -17,6 +17,7 @@ #include "access/hash.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "port/pg_bitutils.h" #include "storage/buf_internals.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -158,8 +159,7 @@ _hash_spareindex(uint32 num_bucket) { uint32 splitpoint_group; uint32 splitpoint_phases; - - splitpoint_group = _hash_log2(num_bucket); + splitpoint_group = pg_ceil_log2_32(num_bucket); if (splitpoint_group < HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE) return splitpoint_group; diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index b5381958e7..5b4b9e487f 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -87,6 +87,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" #include "utils/dynahash.h" @@ -1718,16 +1719,15 @@ hash_corrupted(HTAB *hashp) int my_log2(long num) { - int i; - long limit; - - /* guard against too-large input, which would put us into infinite loop */ + /* guard against too-large input, which would be invalid for pg_ceil_log2_*() */ if (num > LONG_MAX / 2) num = LONG_MAX / 2; - for (i = 0, limit = 1; limit < num; i++, limit <<= 1) - ; - return i; +#if SIZEOF_LONG < 8 + return pg_ceil_log2_32(num); +#else + return pg_ceil_log2_64(num); +#endif } /* calculate first power of 2 >= num, bounded to what will fit in a long */ diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h index 5a6783f653..8cb03cda6c 100644 --- a/src/include/lib/simplehash.h +++ b/src/include/lib/simplehash.h @@ -57,6 +57,8 @@ * backwards, unless they're empty or already at their optimal position. */ +#include "port/pg_bitutils.h" + /* helpers */ #define SH_MAKE_PREFIX(a) CppConcat(a,_) #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name) @@ -215,27 +217,6 @@ SH_SCOPE void SH_STAT(SH_TYPE * tb); #ifndef SIMPLEHASH_H #define SIMPLEHASH_H -/* FIXME: can we move these to a central location? */ - -/* calculate ceil(log base 2) of num */ -static inline uint64 -sh_log2(uint64 num) -{ - int i; - uint64 limit; - - for (i = 0, limit = 1; limit < num; i++, limit <<= 1) - ; - return i; -} - -/* calculate first power of 2 >= num */ -static inline uint64 -sh_pow2(uint64 num) -{ - return ((uint64) 1) << sh_log2(num); -} - #ifdef FRONTEND #define sh_error(...) pg_log_error(__VA_ARGS__) #define sh_log(...) pg_log_info(__VA_ARGS__) @@ -259,7 +240,7 @@ SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize) size = Max(newsize, 2); /* round up size to the next power of 2, that's how bucketing works */ - size = sh_pow2(size); + size = pg_nextpower2_64(size); Assert(size <= SH_MAX_SIZE); /* @@ -434,7 +415,7 @@ SH_GROW(SH_TYPE * tb, uint32 newsize) uint32 startelem = 0; uint32 copyelem; - Assert(oldsize == sh_pow2(oldsize)); + Assert(oldsize == pg_nextpower2_64(oldsize)); Assert(oldsize != SH_MAX_SIZE); Assert(oldsize < newsize); diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 72a21dbd41..fd7d4021a5 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -81,7 +81,7 @@ my $frontend_extrasource = { }; my @frontend_excludes = ( 'pgevent', 'pg_basebackup', 'pg_rewind', 'pg_dump', - 'pg_waldump', 'scripts'); + 'pg_validatebackup', 'pg_waldump', 'scripts'); sub mkvcbuild { -- 2.25.1