Hi, In [1], David and I talked about a requirement that a user just want to unset the all bits in a Bitmapset but keep the allocated memory un-deallocated for later use. It is impossible for the current Bitmapset. So David suggested a Bitset struct for this purpose. I start this new thread so that the original thread can focus on its own purpose.
commit 0ee7e4789e58d6820e4c1ff62979910c0b01cdbb (HEAD -> s_stuck_v2) Author: yizhi.fzh <yizhi....@alibaba-inc.com> Date: Thu Jan 18 16:52:30 2024 +0800 Introduce a Bitset data struct. While Bitmapset is designed for variable-length of bits, Bitset is designed for fixed-length of bits, the fixed length must be specified at the bitset_init stage and keep unchanged at the whole lifespan. Because of this, some operations on Bitset is simpler than Bitmapset. The bitset_clear unsets all the bits but kept the allocated memory, this capacity is impossible for bit Bitmapset for some solid reasons. Also for performance aspect, the functions for Bitset removed some unlikely checks, instead with some Asserts. [1] https://www.postgresql.org/message-id/CAApHDvpdp9LyAoMXvS7iCX-t3VonQM3fTWCmhconEvORrQ%2BZYA%40mail.gmail.com Any feedback is welcome. -- Best Regards Andy Fan
>From 0ee7e4789e58d6820e4c1ff62979910c0b01cdbb Mon Sep 17 00:00:00 2001 From: "yizhi.fzh" <yizhi....@alibaba-inc.com> Date: Thu, 18 Jan 2024 16:52:30 +0800 Subject: [PATCH v1 1/1] Introduce a Bitset data struct. While Bitmapset is designed for variable-length of bits, Bitset is designed for fixed-length of bits, the fixed length must be specified at the bitset_init stage and keep unchanged at the whole lifespan. Because of this, some operations on Bitset is simpler than Bitmapset. The bitset_clear unsets all the bits but kept the allocated memory, this capacity is impossible for bit Bitmapset for some solid reasons. Also for performance aspect, the functions for Bitset removed some unlikely checks, instead with some Asserts. --- src/backend/nodes/bitmapset.c | 180 +++++++++++++++++++++++++++++-- src/include/nodes/bitmapset.h | 27 +++++ src/tools/pgindent/typedefs.list | 1 + 3 files changed, 199 insertions(+), 9 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index f4b61085be..ade77afbae 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -1224,23 +1224,18 @@ bms_join(Bitmapset *a, Bitmapset *b) * It makes no difference in simple loop usage, but complex iteration logic * might need such an ability. */ -int -bms_next_member(const Bitmapset *a, int prevbit) + +static int +bms_next_member_internal(int nwords, const bitmapword *words, int prevbit) { - int nwords; int wordnum; bitmapword mask; - Assert(a == NULL || IsA(a, Bitmapset)); - - if (a == NULL) - return -2; - nwords = a->nwords; prevbit++; mask = (~(bitmapword) 0) << BITNUM(prevbit); for (wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++) { - bitmapword w = a->words[wordnum]; + bitmapword w = words[wordnum]; /* ignore bits before prevbit */ w &= mask; @@ -1257,9 +1252,21 @@ bms_next_member(const Bitmapset *a, int prevbit) /* in subsequent words, consider all bits */ mask = (~(bitmapword) 0); } + return -2; } +int +bms_next_member(const Bitmapset *a, int prevbit) +{ + Assert(a == NULL || IsA(a, Bitmapset)); + + if (a == NULL) + return -2; + + return bms_next_member_internal(a->nwords, a->words, prevbit); +} + /* * bms_prev_member - find prev member of a set * @@ -1365,3 +1372,158 @@ bitmap_match(const void *key1, const void *key2, Size keysize) return !bms_equal(*((const Bitmapset *const *) key1), *((const Bitmapset *const *) key2)); } + +/* + * bitset_init - create a Bitset. the set will be round up to nwords; + */ +Bitset * +bitset_init(size_t size) +{ + int nword = size + BITS_PER_BITMAPWORD - 1 / BITS_PER_BITMAPWORD; + Bitset *result; + + Assert(size > 0); + + result = (Bitset *) palloc0(sizeof(Bitset *) + nword * sizeof(bitmapword)); + result->nwords = nword; + + return result; +} + +/* + * bitset_clear - clear the bits only, but the memory is still there. + */ +void +bitset_clear(Bitset *a) +{ + Assert(a != NULL); + memset(a->words, 0, sizeof(bitmapword) * a->nwords); +} + +void +bitset_free(Bitset *a) +{ + Assert(a != NULL); + pfree(a); +} + +void +bitset_add_member(Bitset *a, int x) +{ + int wordnum, + bitnum; + + Assert(x >= 0); + + wordnum = WORDNUM(x); + bitnum = BITNUM(x); + + a->words[wordnum] |= ((bitmapword) 1 << bitnum); +} + +void +bitset_del_member(Bitset *a, int x) +{ + int wordnum, + bitnum; + + Assert(x >= 0); + + wordnum = WORDNUM(x); + bitnum = BITNUM(x); + + a->words[wordnum] &= ~((bitmapword) 1 << bitnum); +} + +int +bitset_is_member(Bitset *a, int x) +{ + int wordnum, + bitnum; + + /* used in expression engine */ + Assert(x >= 0); + + wordnum = WORDNUM(x); + bitnum = BITNUM(x); + + Assert(wordnum < a->nwords); + + return (a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0; +} + +int +bitset_next_member(Bitset *a, int prevbit) +{ + Assert(a != NULL); + + return bms_next_member_internal(a->nwords, a->words, prevbit); +} + + +/* + * bitset_replace_members_fast + * populate the a's members from b, where the a and b must have a + * equal number of words. + */ +void +bitset_replace_members_fast(Bitset *a, Bitset *b) +{ + int i; + + Assert(a != NULL); + Assert(b != NULL); + + if (a->nwords != b->nwords) + elog(ERROR, + "Only replace members with same-size is supported."); + + i = 0; + do + { + a->words[i] = b->words[i]; + } while (++i < a->nwords); +} + + +/* + * bitset_to_bitmap - build a legal bitmapset from bitset. + */ +Bitmapset * +bitset_to_bitmap(Bitset *a) +{ + int n = a->nwords - 1; + + bool found = false; /* any non-empty bits */ + Bitmapset *result; + int i; + + if (a->nwords == 0) + return NULL; + + do + { + if (a->words[n] > 0) + { + found = true; + break; + } + } while (--n >= 0); + + if (!found) + return NULL; + + result = (Bitmapset *) palloc0(BITMAPSET_SIZE(n + 1)); + result->type = T_Bitmapset; + result->nwords = n + 1; + + Assert(result->nwords <= a->nwords); + + i = 0; + do + { + result->words[i] = a->words[i]; + } while (++i < result->nwords); + + return result; +} diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index efc8890ce6..c69a457257 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -55,6 +55,24 @@ typedef struct Bitmapset bitmapword words[FLEXIBLE_ARRAY_MEMBER]; /* really [nwords] */ } Bitmapset; +/* + * While Bitmapset is designed for variable-length of bits, Bitset is + * designed for fixed-length of bits, the fixed length must be specified at + * the bitset_init stage and keep unchanged at the whole lifespan. Because + * of this, some operations on Bitset is simpler than Bitmapset. + * + * The bitset_clear unsets all the bits but kept the allocated memory, this + * capacity is impossible for bit Bitmapset for some solid reasons. + * + * Also for performance aspect, the functions for Bitset removed some + * unlikely checks, instead with some Asserts. + */ + +typedef struct Bitset +{ + int nwords; /* number of words in array */ + bitmapword words[FLEXIBLE_ARRAY_MEMBER]; /* really [nwords] */ +} Bitset; /* result of bms_subset_compare */ typedef enum @@ -123,4 +141,13 @@ extern uint32 bms_hash_value(const Bitmapset *a); extern uint32 bitmap_hash(const void *key, Size keysize); extern int bitmap_match(const void *key1, const void *key2, Size keysize); +extern Bitset *bitset_init(size_t size); +extern void bitset_clear(Bitset *a); +extern void bitset_free(Bitset *a); +extern void bitset_add_member(Bitset *a, int x); +extern void bitset_del_member(Bitset *a, int x); +extern int bitset_is_member(Bitset *a, int bit); +extern int bitset_next_member(Bitset *a, int prevbit); +extern void bitset_replace_members_fast(Bitset *a, Bitset *b); +extern Bitmapset *bitset_to_bitmap(Bitset *a); #endif /* BITMAPSET_H */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 5fd46b7bd1..6702856f47 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -267,6 +267,7 @@ BitmapOr BitmapOrPath BitmapOrState Bitmapset +Bitset Block BlockId BlockIdData -- 2.34.1