On Fri, Sep 05, 2025 at 10:40:46AM +0100, Dean Rasheed wrote: > Alternatively, why not just impose the upper bound at the call sites > if needed, so there may be no need for these functions at all. For > example, looking at nodeHash.c, it would seem much more logical to > have ExecChooseHashTableSize() put an upper bound on nbuckets, rather > than capping log2_nbuckets after nbuckets has been chosen, risking > them getting out-of-sync.
Yep, that may be the best course of action. As far as I can see, this is capped by palloc() and HashJoinTuple, so we should be OK with putting a hard limit at (INT_MAX / 2) and call it a day, I guess? The two other call sites of my_log2() are worker.c, for the number of subxacts, which relies on int32. The other call site is nodeAgg.c, capped at HASHAGG_MAX_PARTITIONS (1024). As of the attached, dynahash.h can be removed, which is the minimal goal I had in mind. I am not sure about the need to tweak more dynahash.c, as we've relied on long in this file for many years. We could bite the bullet and do it, of course, but I am not sure.. So I would be happy with only the attached changes. What do you think? -- Michael
From 5c81cf6b035368ac2dc9c4b20881a9e13eff5360 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 9 Sep 2025 12:40:04 +0900 Subject: [PATCH v3 1/2] Replace callers of dynahash.h's my_log() by equivalent in pg_bitutils.h All these callers use 4-byte signed integers for their variables, hence they do not require my_log2() maximum based on int64. This eases an upcoming removal of my_log2(). ExecChooseHashTableSize(), that calculates the number of buckets to use for hash tables, is now capped at 2^30, to keep pg_ceil_log2_32() on the same side. nodeAgg.c is already capped at 1024, and worker.c relies on the maximum number of subxacts. --- src/backend/executor/nodeAgg.c | 3 +-- src/backend/executor/nodeHash.c | 14 ++++++++++---- src/backend/replication/logical/worker.c | 3 +-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 377e016d7322..a4f3d30f307c 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -267,7 +267,6 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/datum.h" -#include "utils/dynahash.h" #include "utils/expandeddatum.h" #include "utils/injection_point.h" #include "utils/logtape.h" @@ -2115,7 +2114,7 @@ hash_choose_num_partitions(double input_groups, double hashentrysize, npartitions = (int) dpartitions; /* ceil(log2(npartitions)) */ - partition_bits = my_log2(npartitions); + partition_bits = pg_ceil_log2_32(npartitions); /* make sure that we don't exhaust the hash bits */ if (partition_bits + used_bits >= 32) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 8d2201ab67fa..db8f4722ebb2 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -36,7 +36,6 @@ #include "executor/nodeHashjoin.h" #include "miscadmin.h" #include "port/pg_bitutils.h" -#include "utils/dynahash.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" @@ -340,7 +339,7 @@ MultiExecParallelHash(HashState *node) */ hashtable->curbatch = -1; hashtable->nbuckets = pstate->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_32(hashtable->nbuckets); hashtable->totalTuples = pstate->total_tuples; /* @@ -480,7 +479,7 @@ ExecHashTableCreate(HashState *state) &nbuckets, &nbatch, &num_skew_mcvs); /* nbuckets must be a power of 2 */ - log2_nbuckets = my_log2(nbuckets); + log2_nbuckets = pg_ceil_log2_32(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* @@ -935,6 +934,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, Assert(nbuckets > 0); Assert(nbatch > 0); + /* + * Cap nbuckets, for power of 2 calculations. This maximum is safe + * for pg_ceil_log2_32(). + */ + if (nbuckets > PG_INT32_MAX / 2) + nbuckets = PG_INT32_MAX / 2; + *numbuckets = nbuckets; *numbatches = nbatch; } @@ -3499,7 +3505,7 @@ ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno) dsa_get_address(hashtable->area, hashtable->batches[batchno].shared->buckets); hashtable->nbuckets = hashtable->parallel_state->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_32(hashtable->nbuckets); hashtable->current_chunk = NULL; hashtable->current_chunk_shared = InvalidDsaPointer; hashtable->batches[batchno].at_least_one_chunk = false; diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index c0f6bef5c282..c6d7c1fcde71 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -276,7 +276,6 @@ #include "storage/procarray.h" #include "tcop/tcopprot.h" #include "utils/acl.h" -#include "utils/dynahash.h" #include "utils/guc.h" #include "utils/inval.h" #include "utils/lsyscache.h" @@ -5109,7 +5108,7 @@ subxact_info_read(Oid subid, TransactionId xid) len = sizeof(SubXactInfo) * subxact_data.nsubxacts; /* we keep the maximum as a power of 2 */ - subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts); + subxact_data.nsubxacts_max = 1 << pg_ceil_log2_32(subxact_data.nsubxacts); /* * Allocate subxact information in the logical streaming context. We need -- 2.51.0
From c6a492debc0d4c74c26f48db18aeeb6d4db8a088 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 9 Sep 2025 12:42:48 +0900 Subject: [PATCH v3 2/2] Remove dynahash.h All the callers of my_log2() are now limited to dynahash.c, so let's remove its header. This capability is provided by pg_bitutils.h already. --- src/include/utils/dynahash.h | 20 -------------------- src/backend/utils/hash/dynahash.c | 4 ++-- 2 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 src/include/utils/dynahash.h diff --git a/src/include/utils/dynahash.h b/src/include/utils/dynahash.h deleted file mode 100644 index a4362d3f65e5..000000000000 --- a/src/include/utils/dynahash.h +++ /dev/null @@ -1,20 +0,0 @@ -/*------------------------------------------------------------------------- - * - * dynahash.h - * POSTGRES dynahash.h file definitions - * - * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * src/include/utils/dynahash.h - * - *------------------------------------------------------------------------- - */ -#ifndef DYNAHASH_H -#define DYNAHASH_H - -extern int my_log2(int64 num); - -#endif /* DYNAHASH_H */ diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 1aeee5be42ac..ac94b9e93c6e 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -102,7 +102,6 @@ #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" -#include "utils/dynahash.h" #include "utils/memutils.h" @@ -281,6 +280,7 @@ static bool init_htab(HTAB *hashp, int64 nelem); pg_noreturn static void hash_corrupted(HTAB *hashp); static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr); +static int my_log2(int64 num); static int64 next_pow2_int64(int64 num); static int next_pow2_int(int64 num); static void register_seq_scan(HTAB *hashp); @@ -1813,7 +1813,7 @@ hash_corrupted(HTAB *hashp) } /* calculate ceil(log base 2) of num */ -int +static int my_log2(int64 num) { /* -- 2.51.0
signature.asc
Description: PGP signature