On Wed, Aug 27, 2025 at 10:00:16AM +0200, Peter Eisentraut wrote: > That seems highly confusing. What is the meaning of the "32" then? > > If you need 64-bit behavior, use the variant with "64" in the name.
static int
next_pow2_int(int64 num)
{
if (num > INT_MAX / 2)
num = INT_MAX / 2;
return 1 << my_log2(num);
}
The pain point for me is the assumption of this routine on HEAD and
older branches, leading to a more protective overflow pattern for the
number of partitions calculated. I don't see an elegant way to keep
the same calculations for the "next power" routines while making the
int32 flavor more compliant with the fact that it may have a int64
argument (long previously), because it would mean that we would
underestimate the number returned here each time "num" is higher than
(INT_MAX / 2). That's quite dangerous when applied to dynahash.c,
which is a layer that extensions like. That would lead to doubling
the number of "next power" routines in pg_bitutils.h, which is not
cool in the long-term because it would facilitate incorrect uses.
So, taking a step back, I don't know what would be a good fit for
these duplicates of the "next power" routines upper-bounded on input
when attached to pg_bitutils.h. However, I do see that we can get rid
of pg_log2() and dynahash.h with a consistent interface in
pg_bitutils.h, by reducing my proposal to the introduction of
pg_ceil_log2_32_bound() and pg_ceil_log2_64_bound().
At the end, next_pow2_int64() and next_pow2_int() are a lesser deal to
me, being static to dynahash.c. With that in mind, I am finishing
with the attached. Less ambitious, still it's a nice cleanup IMO.
What do you think?
--
Michael
From c1f1dd163f671185bc0fba86acdd0b108007b711 Mon Sep 17 00:00:00 2001 From: Michael Paquier <[email protected]> Date: Mon, 1 Sep 2025 12:23:44 +0900 Subject: [PATCH v2] Clean up my_log2() in dynahash.c, adding equivalents to bitutils.h --- src/include/port/pg_bitutils.h | 34 ++++++++++++++++++++++++ src/include/utils/dynahash.h | 20 -------------- src/backend/executor/nodeAgg.c | 3 +-- src/backend/executor/nodeHash.c | 7 +++-- src/backend/replication/logical/worker.c | 3 +-- src/backend/utils/hash/dynahash.c | 23 +++------------- 6 files changed, 43 insertions(+), 47 deletions(-) delete mode 100644 src/include/utils/dynahash.h diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index c7901bf8ddc0..5354d152a116 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -276,6 +276,40 @@ pg_ceil_log2_64(uint64 num) return pg_leftmost_one_pos64(num - 1) + 1; } +/* + * pg_ceil_log2_32_bound + * Returns equivalent of ceil(log2(num)), with overflow safeguard + * for pg_leftmost_one_pos32. + */ +static inline uint32 +pg_ceil_log2_32_bound(uint32 num) +{ + if (num > PG_INT32_MAX / 2) + num = PG_INT32_MAX / 2; + + if (num < 2) + return 0; + else + return pg_leftmost_one_pos32(num - 1) + 1; +} + +/* + * pg_ceil_log2_64_bound + * Returns equivalent of ceil(log2(num)), with overflow safeguard + * for pg_leftmost_one_pos64. + */ +static inline uint64 +pg_ceil_log2_64_bound(uint64 num) +{ + if (num > PG_INT64_MAX / 2) + num = PG_INT64_MAX / 2; + + if (num < 2) + return 0; + else + return pg_leftmost_one_pos64(num - 1) + 1; +} + /* * With MSVC on x86_64 builds, try using native popcnt instructions via the * __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's diff --git a/src/include/utils/dynahash.h b/src/include/utils/dynahash.h deleted file mode 100644 index a4362d3f65e5..000000000000 --- a/src/include/utils/dynahash.h +++ /dev/null @@ -1,20 +0,0 @@ -/*------------------------------------------------------------------------- - * - * dynahash.h - * POSTGRES dynahash.h file definitions - * - * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * src/include/utils/dynahash.h - * - *------------------------------------------------------------------------- - */ -#ifndef DYNAHASH_H -#define DYNAHASH_H - -extern int my_log2(int64 num); - -#endif /* DYNAHASH_H */ diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 377e016d7322..b5fa9af9d992 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -267,7 +267,6 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/datum.h" -#include "utils/dynahash.h" #include "utils/expandeddatum.h" #include "utils/injection_point.h" #include "utils/logtape.h" @@ -2115,7 +2114,7 @@ hash_choose_num_partitions(double input_groups, double hashentrysize, npartitions = (int) dpartitions; /* ceil(log2(npartitions)) */ - partition_bits = my_log2(npartitions); + partition_bits = pg_ceil_log2_64_bound(npartitions); /* make sure that we don't exhaust the hash bits */ if (partition_bits + used_bits >= 32) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 8d2201ab67fa..14d934ab42b2 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -36,7 +36,6 @@ #include "executor/nodeHashjoin.h" #include "miscadmin.h" #include "port/pg_bitutils.h" -#include "utils/dynahash.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" @@ -340,7 +339,7 @@ MultiExecParallelHash(HashState *node) */ hashtable->curbatch = -1; hashtable->nbuckets = pstate->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_64_bound(hashtable->nbuckets); hashtable->totalTuples = pstate->total_tuples; /* @@ -480,7 +479,7 @@ ExecHashTableCreate(HashState *state) &nbuckets, &nbatch, &num_skew_mcvs); /* nbuckets must be a power of 2 */ - log2_nbuckets = my_log2(nbuckets); + log2_nbuckets = pg_ceil_log2_64_bound(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* @@ -3499,7 +3498,7 @@ ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno) dsa_get_address(hashtable->area, hashtable->batches[batchno].shared->buckets); hashtable->nbuckets = hashtable->parallel_state->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_64_bound(hashtable->nbuckets); hashtable->current_chunk = NULL; hashtable->current_chunk_shared = InvalidDsaPointer; hashtable->batches[batchno].at_least_one_chunk = false; diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 22ad9051db3f..664db8096b26 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -268,7 +268,6 @@ #include "storage/procarray.h" #include "tcop/tcopprot.h" #include "utils/acl.h" -#include "utils/dynahash.h" #include "utils/guc.h" #include "utils/inval.h" #include "utils/lsyscache.h" @@ -4911,7 +4910,7 @@ subxact_info_read(Oid subid, TransactionId xid) len = sizeof(SubXactInfo) * subxact_data.nsubxacts; /* we keep the maximum as a power of 2 */ - subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts); + subxact_data.nsubxacts_max = 1 << pg_ceil_log2_64_bound(subxact_data.nsubxacts); /* * Allocate subxact information in the logical streaming context. We need diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 1aeee5be42ac..6a86572ee61d 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -102,7 +102,6 @@ #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" -#include "utils/dynahash.h" #include "utils/memutils.h" @@ -547,7 +546,7 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) if (flags & HASH_SEGMENT) { hctl->ssize = info->ssize; - hctl->sshift = my_log2(info->ssize); + hctl->sshift = pg_ceil_log2_64_bound(info->ssize); /* ssize had better be a power of 2 */ Assert(hctl->ssize == (1L << hctl->sshift)); } @@ -1812,26 +1811,12 @@ hash_corrupted(HTAB *hashp) elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname); } -/* calculate ceil(log base 2) of num */ -int -my_log2(int64 num) -{ - /* - * guard against too-large input, which would be invalid for - * pg_ceil_log2_*() - */ - if (num > PG_INT64_MAX / 2) - num = PG_INT64_MAX / 2; - - return pg_ceil_log2_64(num); -} - /* calculate first power of 2 >= num, bounded to what will fit in a int64 */ static int64 next_pow2_int64(int64 num) { - /* my_log2's internal range check is sufficient */ - return 1L << my_log2(num); + /* pg_ceil_log2_64_bound's internal range check is sufficient */ + return 1L << pg_ceil_log2_64_bound(num); } /* calculate first power of 2 >= num, bounded to what will fit in an int */ @@ -1840,7 +1825,7 @@ next_pow2_int(int64 num) { if (num > INT_MAX / 2) num = INT_MAX / 2; - return 1 << my_log2(num); + return 1 << pg_ceil_log2_32_bound(num); } -- 2.51.0
signature.asc
Description: PGP signature
