This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit a03d2b857a9b240326ad47560db3191f47cf503c
Author: Yao Wang <[email protected]>
AuthorDate: Fri Mar 3 15:41:19 2023 +0800

    FIXME enable 64bit bitmapset and update visimap (#14784)
    
    It includes the portion about enabling 64bit bms and other necessary
    changes related to visimap, plus updates of cmockery unit tests.
---
 .../access/appendonly/appendonly_visimap_entry.c   |  47 +++--
 src/backend/nodes/bitmapset.c                      |   3 -
 src/backend/utils/misc/bitmap_compression.c        | 114 ++++++++++-
 .../utils/misc/test/bitmap_compression_test.c      | 223 +++++++++++++++++++--
 src/include/access/appendonly_visimap.h            |   6 +
 src/include/nodes/bitmapset.h                      |  28 +--
 src/include/utils/bitmap_compression.h             |  11 +
 7 files changed, 381 insertions(+), 51 deletions(-)

diff --git a/src/backend/access/appendonly/appendonly_visimap_entry.c 
b/src/backend/access/appendonly/appendonly_visimap_entry.c
index 40427f4bc0..a605536cb5 100644
--- a/src/backend/access/appendonly/appendonly_visimap_entry.c
+++ b/src/backend/access/appendonly/appendonly_visimap_entry.c
@@ -129,7 +129,10 @@ AppendOnlyVisimap_GetAttrNotNull(HeapTuple t, TupleDesc 
td, int attr)
 void
 AppendOnlyVisiMapEnty_ReadData(AppendOnlyVisimapEntry *visiMapEntry, size_t 
dataSize)
 {
-       int                     newWordCount;
+       /* the block count of (ondisk) bitstream */
+       int                     onDiskBlockCount;
+       /* the word count of in-memory bitmapset */
+       int                     bmsWordCount;
 
        Assert(visiMapEntry);
        Assert(CurrentMemoryContext == visiMapEntry->memoryContext);
@@ -156,21 +159,23 @@ AppendOnlyVisiMapEnty_ReadData(AppendOnlyVisimapEntry 
*visiMapEntry, size_t data
         * but I think it is reasonable to set it to NULLL to avoid similar 
issues.
         */
        visiMapEntry->bitmap = NULL;
-       newWordCount =
-               BitmapDecompress_GetBlockCount(&decompressState);
-       if (newWordCount > 0)
+       BitmapDecompress_CalculateBlockCounts(&decompressState,
+                                                                               
  &onDiskBlockCount,
+                                                                               
  &bmsWordCount);
+
+       if (onDiskBlockCount > 0)
        {
                visiMapEntry->bitmap = palloc0(offsetof(Bitmapset, words) +
-                                                                          
(newWordCount * sizeof(bitmapword)));
-               visiMapEntry->bitmap->nwords = newWordCount;
+                                                                          
(bmsWordCount * sizeof(bitmapword)));
+               visiMapEntry->bitmap->nwords = bmsWordCount;
                BitmapDecompress_Decompress(&decompressState,
-                                                                       
visiMapEntry->bitmap->words,
-                                                                       
newWordCount);
+                                                                       (uint32 
*)visiMapEntry->bitmap->words,
+                                                                       
onDiskBlockCount);
        }
-       else if (newWordCount != 0)
+       else if (onDiskBlockCount != 0)
        {
                elog(ERROR,
-                        "illegal visimap block count: visimap block count %d", 
newWordCount);
+                        "illegal visimap block count: visimap block count %d", 
onDiskBlockCount);
        }
 
 }
@@ -264,23 +269,32 @@ AppendOnlyVisimapEntry_GetHiddenTupleCount(
 void
 AppendOnlyVisimapEntry_WriteData(AppendOnlyVisimapEntry *visiMapEntry)
 {
-       int                     bitmapSize,
-                               compressedBitmapSize;
+       /* bitmap size, in bytes */
+       int     bitmapSize;
+       int     compressedBitmapSize;
+       /* word count in 64bit or 32bit words for in-memory bms */
+       int     bmsWordCount;
+       /* block count always in 32bit (after conversion if necessary) */
+       int     blockCount;
 
        Assert(visiMapEntry);
        Assert(CurrentMemoryContext == visiMapEntry->memoryContext);
        Assert(AppendOnlyVisimapEntry_IsValid(visiMapEntry));
 
-       bitmapSize = (visiMapEntry->bitmap ? (visiMapEntry->bitmap->nwords * 
sizeof(bitmapword)) : 0);
+       BitmapCompress_CalculateBlockCounts(visiMapEntry->bitmap,
+                                                                               
&blockCount,
+                                                                               
&bmsWordCount);
+       bitmapSize = sizeof(uint32) * blockCount;
        bitmapSize += BITMAP_COMPRESSION_HEADER_SIZE;
+       Assert(bmsWordCount <= APPENDONLY_VISIMAP_MAX_BITMAP_WORD_COUNT);
 
        Assert(visiMapEntry->data);
        Assert(APPENDONLY_VISIMAP_DATA_BUFFER_SIZE >= bitmapSize);
        visiMapEntry->data->version = 1;
 
        compressedBitmapSize = Bitmap_Compress(BITMAP_COMPRESSION_TYPE_DEFAULT,
-                                                                               
   (visiMapEntry->bitmap ? visiMapEntry->bitmap->words : NULL),
-                                                                               
   (visiMapEntry->bitmap ? visiMapEntry->bitmap->nwords : 0),
+                                                                               
   (visiMapEntry->bitmap ? (uint32*)visiMapEntry->bitmap->words : NULL),
+                                                                               
   blockCount,
                                                                                
   visiMapEntry->data->data,
                                                                                
   bitmapSize);
        Assert(compressedBitmapSize >= BITMAP_COMPRESSION_HEADER_SIZE);
@@ -499,6 +513,9 @@ AppendOnlyVisimapEntry_GetMinimalSizeToCover(int64 offset)
        minSize |= minSize >> 8;
        minSize |= minSize >> 16;
        minSize++;
+
+       Assert(minSize <= APPENDONLY_VISIMAP_MAX_BITMAP_WORD_COUNT);
+
        return minSize;
 }
 
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 8edd4966fe..0f01bcf8cc 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -29,9 +29,6 @@
 #define WORDNUM(x)     ((x) / BITS_PER_BITMAPWORD)
 #define BITNUM(x)      ((x) % BITS_PER_BITMAPWORD)
 
-#define BITMAPSET_SIZE(nwords) \
-       (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
-
 /*----------
  * This is a well-known cute trick for isolating the rightmost one-bit
  * in a word.  It assumes two's complement arithmetic.  Consider any
diff --git a/src/backend/utils/misc/bitmap_compression.c 
b/src/backend/utils/misc/bitmap_compression.c
index 5126d14041..133fa6297e 100644
--- a/src/backend/utils/misc/bitmap_compression.c
+++ b/src/backend/utils/misc/bitmap_compression.c
@@ -15,6 +15,7 @@
 #include "utils/bitmap_compression.h"
 #include "utils/bitstream.h"
 #include "utils/guc.h"
+#include "access/appendonly_visimap.h"
 
 typedef enum BitmapCompressionFlag
 {
@@ -80,9 +81,15 @@ BitmapDecompress_HasError(
 }
 
 /*
- * Performs the bitmap decompression.
+ * Perform bitmap decompression into in-memory buffer
  *
- * bitmapDataSize in uint32-words.
+ * bitmap: caller-allocated buffer that can hold state->blockCount
+ * number of 32-bit on-disk bitmap words.
+ *
+ * For both 32-bit and 64-bit in-memory bitmap word sizes, we write
+ * 32-bit words into the in-memory buffer contiguously. This is safe
+ * to do as we interpret two contiguous 32-bit words as one 64-bit
+ * word.
  */
 void
 BitmapDecompress_Decompress(BitmapDecompressState *state,
@@ -105,7 +112,7 @@ BitmapDecompress_Decompress(BitmapDecompressState *state,
 
        if (state->compressionType == BITMAP_COMPRESSION_TYPE_NO)
        {
-               memcpy(bitmap, 
+               memcpy(bitmap,
                                Bitstream_GetAlignedData(&state->bitstream, 
16), 
                                state->blockCount * sizeof(uint32));
        }
@@ -357,3 +364,104 @@ Bitmap_Compress(
                        return 0;
        }
 }
+
+/*
+ * Calculate two counts for decompress:
+ * 1. 'onDiskBlockCount': the block count of (ondisk) bitstream
+ * 2. 'bmsWordCount': the word count of in-memory bitmapset
+ */
+void BitmapDecompress_CalculateBlockCounts(BitmapDecompressState 
*decompressState,
+                                                                               
   int *onDiskBlockCount,
+                                                                               
   int *bmsWordCount)
+{
+       *onDiskBlockCount =
+               BitmapDecompress_GetBlockCount(decompressState);
+
+       /* The on-disk bitmap representation always uses 32-bit block size
+        * (for backward compatibility). Depending on the environment, we
+        * may be using either 64-bit words or 32-bit words for the
+        * in-memory representation.
+        * So, if (in-memory) bitmapset uses 64 bit words, we can use half
+        * of the on-disk bitmap block count.
+        */
+       if (BITS_PER_BITMAPWORD == 64)
+       {
+               /*
+                * Number of on-disk blocks is always 0, 1 or even.
+                * See resizing logic in AppendOnlyVisimapEntry_HideTuple()
+                */
+               if (*onDiskBlockCount == 1)
+                       *bmsWordCount = 1;
+               else
+               {
+                       Assert(*onDiskBlockCount % 2 == 0);
+                       *bmsWordCount = *onDiskBlockCount / 2;
+               }
+       }
+       else
+       {
+               Assert(BITS_PER_BITMAPWORD == 32);
+               *bmsWordCount = *onDiskBlockCount;
+       }
+       Assert(*bmsWordCount <= APPENDONLY_VISIMAP_MAX_BITMAP_WORD_COUNT);
+       Assert(*bmsWordCount >= 0);
+}
+
+/*
+ * Calculate two counts for compress:
+ * 1. 'onDiskBlockCount': the block count of (ondisk) bitstream
+ * 2. 'bmsWordCount': the word count of in-memory bitmapset
+ */
+void BitmapCompress_CalculateBlockCounts(Bitmapset *bitmap,
+                                                                               
 int *onDiskBlockCount,
+                                                                               
 int *bmsWordCount)
+{
+       *onDiskBlockCount = 0;
+       *bmsWordCount = 0;
+
+       if (bitmap)
+       {
+               *bmsWordCount = bitmap->nwords;
+
+               /*
+                * On 64bit env, there is a conflict: in-memory bms is in 64bit 
word,
+                * but on-disk block is in 32bit word to keep consistency. We 
need to
+                * provide 32bit block count to Bitmap_Compress() after kind of
+                * conversion.
+                */
+               if (BITS_PER_BITMAPWORD == 64)
+               {
+                       /*
+                        * On 64bit env, if there is only one 64 bit word in 
memory, and the
+                        * 32 higher order bits of that word are all zero, it 
implies that
+                        * there is only one 32 bit word. We can always assume 
that the 32
+                        * higher order bits for a 64 bit bitmap word is zeroed 
out - this
+                        * is ensured by routines such as bms_add_member() and
+                        * AppendOnlyVisiMapEnty_ReadData().
+                        */
+                       if (*bmsWordCount == 1
+                               && (bitmap->words[0] >> 32) == 0)
+                       {
+                               *onDiskBlockCount = 1;
+                       }
+                       else
+                       {
+                               /*
+                                * onDiskBlockCount required by 
Bitmap_Compress() is always in
+                                * uint32-words. So, if bitmapset uses 64 bit 
words, double
+                                * the value of bmsWordCount.
+                                */
+                               *onDiskBlockCount = bitmap->nwords * 2;
+                       }
+               }
+               else
+               {
+                       Assert(BITS_PER_BITMAPWORD == 32);
+
+                       /*
+                        * On 32bit env, onDiskBlockCount is always equal to 
bmsWordCount.
+                        */
+                       *onDiskBlockCount = bitmap->nwords;
+               }
+       }
+}
diff --git a/src/backend/utils/misc/test/bitmap_compression_test.c 
b/src/backend/utils/misc/test/bitmap_compression_test.c
index d0de520d53..c70aa602c5 100644
--- a/src/backend/utils/misc/test/bitmap_compression_test.c
+++ b/src/backend/utils/misc/test/bitmap_compression_test.c
@@ -17,9 +17,29 @@ test__BitmapCompression__ZeroBitmap(void **state)
        unsigned char output[20];
        memset(output, 0, 20);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+       /*
+        * For 64bit bms, bmsWordCount is half of onDiskBlockCount;
+        * For 32bit bms, bmsWordCount is equal to onDiskBlockCount.
+        */
+       int expectedBmwWordCount = BITS_PER_BITMAPWORD == 64 ? 2 : 4;
+       Bitmapset *bms;
+
+       /* fake a bitmapset with the bitmap data */
+       bms = (Bitmapset *) palloc(BITMAPSET_SIZE(expectedBmwWordCount));
+       bms->nwords = expectedBmwWordCount;
+       memcpy(bms->words, bitmap, sizeof(uint32) * 4);
+
+       BitmapCompress_CalculateBlockCounts(bms,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(4, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        int r = Bitmap_Compress(
                BITMAP_COMPRESSION_TYPE_DEFAULT, 
-               bitmap, 4,
+               bitmap, onDiskBlockCount,
                output, 20);
        assert_true(r < sizeof(uint32) * 4 && r >= 0);
        uint32 bitmap2[4];
@@ -32,8 +52,14 @@ test__BitmapCompression__ZeroBitmap(void **state)
                        BitmapDecompress_GetCompressionType(&decomp_state));
        assert_int_equal(4, BitmapDecompress_GetBlockCount(&decomp_state));
 
+       BitmapDecompress_CalculateBlockCounts(&decomp_state,
+                                                                               
  &onDiskBlockCount,
+                                                                               
  &bmsWordCount);
+       assert_int_equal(4, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        BitmapDecompress_Decompress(&decomp_state,
-               bitmap2, 4);
+               bitmap2, onDiskBlockCount);
        assert_memory_equal(bitmap, bitmap2, sizeof(uint32) * 4);
 }
 
@@ -51,9 +77,29 @@ test__BitmapCompression__Raw(void **state)
        unsigned char output[sizeof(uint32) * 5];
        memset(output, 0, sizeof(uint32) * 5);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+       /*
+        * For 64bit bms, bmsWordCount is half of onDiskBlockCount;
+        * For 32bit bms, bmsWordCount is equal to onDiskBlockCount.
+        */
+       int expectedBmwWordCount = BITS_PER_BITMAPWORD == 64 ? 2 : 4;
+       Bitmapset *bms;
+
+       /* fake a bitmapset with the bitmap data */
+       bms = (Bitmapset *) palloc(BITMAPSET_SIZE(expectedBmwWordCount));
+       bms->nwords = expectedBmwWordCount;
+       memcpy(bms->words, bitmap, sizeof(uint32) * 4);
+
+       BitmapCompress_CalculateBlockCounts(bms,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(4, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        int r = Bitmap_Compress(
                BITMAP_COMPRESSION_TYPE_DEFAULT, 
-               bitmap, blockCount,
+               bitmap, onDiskBlockCount,
                output, sizeof(uint32) * 5);
        assert_true(r < sizeof(uint32) * blockCount && r >= 0);
        uint32 bitmap2[4];
@@ -66,8 +112,14 @@ test__BitmapCompression__Raw(void **state)
                        BitmapDecompress_GetCompressionType(&decomp_state));
        assert_int_equal(blockCount, 
BitmapDecompress_GetBlockCount(&decomp_state));
 
+       BitmapDecompress_CalculateBlockCounts(&decomp_state,
+                                                                               
  &onDiskBlockCount,
+                                                                               
  &bmsWordCount);
+       assert_int_equal(blockCount, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        BitmapDecompress_Decompress(&decomp_state,
-               bitmap2, blockCount);
+               bitmap2, onDiskBlockCount);
        assert_memory_equal(bitmap, bitmap2, sizeof(uint32) * blockCount);
 }
 
@@ -85,9 +137,29 @@ test__BitmapCompression__ExplicitNoCompression(void **state)
        unsigned char output[sizeof(uint32) * 5];
        memset(output, 0, sizeof(uint32) * 5);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+       /*
+        * For 64bit bms, bmsWordCount is half of onDiskBlockCount;
+        * For 32bit bms, bmsWordCount is equal to onDiskBlockCount.
+        */
+       int expectedBmwWordCount = BITS_PER_BITMAPWORD == 64 ? 2 : 4;
+       Bitmapset *bms;
+
+       /* fake a bitmapset with the bitmap data */
+       bms = (Bitmapset *) palloc(BITMAPSET_SIZE(expectedBmwWordCount));
+       bms->nwords = expectedBmwWordCount;
+       memcpy(bms->words, bitmap, sizeof(uint32) * 4);
+
+       BitmapCompress_CalculateBlockCounts(bms,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(4, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        int r = Bitmap_Compress(
                        BITMAP_COMPRESSION_TYPE_NO, 
-               bitmap, blockCount,
+               bitmap, onDiskBlockCount,
                output, sizeof(uint32) * 5);
        assert_int_equal(r, (sizeof(uint32) * 4) + 2);
 
@@ -101,8 +173,14 @@ test__BitmapCompression__ExplicitNoCompression(void 
**state)
                        BitmapDecompress_GetCompressionType(&decomp_state));
        assert_int_equal(blockCount, 
BitmapDecompress_GetBlockCount(&decomp_state));
 
+       BitmapDecompress_CalculateBlockCounts(&decomp_state,
+                                                                               
  &onDiskBlockCount,
+                                                                               
  &bmsWordCount);
+       assert_int_equal(blockCount, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        BitmapDecompress_Decompress(&decomp_state,
-               bitmap2, blockCount);
+               bitmap2, onDiskBlockCount);
        assert_memory_equal(bitmap, bitmap2, sizeof(uint32) * blockCount);
 }
 
@@ -116,9 +194,18 @@ 
test__BitmapCompression__ExplicitNoCompressionNoBlocks(void **state)
        unsigned char output[sizeof(uint32) * 5];
        memset(output, 0, sizeof(uint32) * 5);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+
+       BitmapCompress_CalculateBlockCounts(NULL,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(0, onDiskBlockCount);
+       assert_int_equal(0, bmsWordCount);
+
        int r = Bitmap_Compress(
                        BITMAP_COMPRESSION_TYPE_NO, 
-               bitmap, blockCount,
+               bitmap, onDiskBlockCount,
                output, sizeof(uint32) * 5);
        assert_int_equal(r, 2);
 
@@ -132,8 +219,14 @@ 
test__BitmapCompression__ExplicitNoCompressionNoBlocks(void **state)
                        BitmapDecompress_GetCompressionType(&decomp_state));
        assert_int_equal(blockCount, 
BitmapDecompress_GetBlockCount(&decomp_state));
 
+       BitmapDecompress_CalculateBlockCounts(&decomp_state,
+                                                                               
  &onDiskBlockCount,
+                                                                               
  &bmsWordCount);
+       assert_int_equal(blockCount, onDiskBlockCount);
+       assert_int_equal(0, bmsWordCount);
+
        BitmapDecompress_Decompress(&decomp_state,
-               bitmap2, blockCount);
+               bitmap2, onDiskBlockCount);
 }
 
 static void
@@ -150,9 +243,29 @@ test__BitmapCompression__ImplicitNoCompression(void 
**state)
        unsigned char output[18];
        memset(output, 0, 18);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+       /*
+        * For 64bit bms, bmsWordCount is half of onDiskBlockCount;
+        * For 32bit bms, bmsWordCount is equal to onDiskBlockCount.
+        */
+       int expectedBmwWordCount = BITS_PER_BITMAPWORD == 64 ? 2 : 4;
+       Bitmapset *bms;
+
+       /* fake a bitmapset with the bitmap data */
+       bms = (Bitmapset *) palloc(BITMAPSET_SIZE(expectedBmwWordCount));
+       bms->nwords = expectedBmwWordCount;
+       memcpy(bms->words, bitmap, sizeof(uint32) * 4);
+
+       BitmapCompress_CalculateBlockCounts(bms,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(4, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        int r = Bitmap_Compress(
                        BITMAP_COMPRESSION_TYPE_DEFAULT, 
-               bitmap, blockCount,
+               bitmap, onDiskBlockCount,
                output, 18);
        assert_int_equal(r, (sizeof(uint32) * 4) + 2);
 
@@ -166,8 +279,14 @@ test__BitmapCompression__ImplicitNoCompression(void 
**state)
                        BitmapDecompress_GetCompressionType(&decomp_state));
        assert_int_equal(blockCount, 
BitmapDecompress_GetBlockCount(&decomp_state));
 
+       BitmapDecompress_CalculateBlockCounts(&decomp_state,
+                                                                               
  &onDiskBlockCount,
+                                                                               
  &bmsWordCount);
+       assert_int_equal(blockCount, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        BitmapDecompress_Decompress(&decomp_state,
-               bitmap2, blockCount);
+               bitmap2, onDiskBlockCount);
        assert_memory_equal(bitmap, bitmap2, sizeof(uint32) * blockCount);
 }
 
@@ -196,9 +315,29 @@ test__BitmapCompression__MultipleTypeBitmap(void **state)
        unsigned char output[sizeof(uint32) * 17];
        memset(output, 0, sizeof(uint32) * 17);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+       /*
+        * For 64bit bms, bmsWordCount is half of onDiskBlockCount;
+        * For 32bit bms, bmsWordCount is equal to onDiskBlockCount.
+        */
+       int expectedBmwWordCount = BITS_PER_BITMAPWORD == 64 ? 8 : 16;
+       Bitmapset *bms;
+
+       /* fake a bitmapset with the bitmap data */
+       bms = (Bitmapset *) palloc(BITMAPSET_SIZE(expectedBmwWordCount));
+       bms->nwords = expectedBmwWordCount;
+       memcpy(bms->words, bitmap, sizeof(uint32) * 4);
+
+       BitmapCompress_CalculateBlockCounts(bms,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(16, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        int r = Bitmap_Compress(
                BITMAP_COMPRESSION_TYPE_DEFAULT, 
-               bitmap, 16,
+               bitmap, onDiskBlockCount,
                output, sizeof(uint32) * 17);
        assert_true(r < sizeof(uint32) * 16 && r >= 0);
        uint32 bitmap2[16];
@@ -211,8 +350,14 @@ test__BitmapCompression__MultipleTypeBitmap(void **state)
                        BitmapDecompress_GetCompressionType(&decomp_state));
        assert_int_equal(16, BitmapDecompress_GetBlockCount(&decomp_state));
 
+       BitmapDecompress_CalculateBlockCounts(&decomp_state,
+                                                                               
  &onDiskBlockCount,
+                                                                               
  &bmsWordCount);
+       assert_int_equal(16, onDiskBlockCount);
+       assert_int_equal(16, BitmapDecompress_GetBlockCount(&decomp_state));
+
        BitmapDecompress_Decompress(&decomp_state,
-               bitmap2, 16);
+               bitmap2, onDiskBlockCount);
        assert_memory_equal(bitmap, bitmap2, sizeof(uint32) * 16);
 }
 
@@ -241,9 +386,29 @@ test__BitmapCompression_ShortDecompress(void **state)
        unsigned char output[sizeof(uint32) * 17];
        memset(output, 0, sizeof(uint32) * 17);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+       /*
+        * For 64bit bms, bmsWordCount is half of onDiskBlockCount;
+        * For 32bit bms, bmsWordCount is equal to onDiskBlockCount.
+        */
+       int expectedBmwWordCount = BITS_PER_BITMAPWORD == 64 ? 8 : 16;
+       Bitmapset *bms;
+
+       /* fake a bitmapset with the bitmap data */
+       bms = (Bitmapset *) palloc(BITMAPSET_SIZE(expectedBmwWordCount));
+       bms->nwords = expectedBmwWordCount;
+       memcpy(bms->words, bitmap, sizeof(uint32) * 16);
+
+       BitmapCompress_CalculateBlockCounts(bms,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(16, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        int r = Bitmap_Compress(
                BITMAP_COMPRESSION_TYPE_DEFAULT, 
-               bitmap, 16,
+               bitmap, onDiskBlockCount,
                output, sizeof(uint32) * 17);
        assert_true(r < sizeof(uint32) * 16 && r >= 0);
        uint32 bitmap2[16];
@@ -265,9 +430,16 @@ test__BitmapCompression_ShortDecompress(void **state)
                        BitmapDecompress_GetCompressionType(&decomp_state));
                assert_int_equal(16, 
BitmapDecompress_GetBlockCount(&decomp_state));
 
+               BitmapDecompress_CalculateBlockCounts(
+                       &decomp_state,
+                       &onDiskBlockCount,
+                       &bmsWordCount);
+               assert_int_equal(16, onDiskBlockCount);
+               assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
                PG_TRY();
                {
-                       BitmapDecompress_Decompress(&decomp_state, bitmap2, 16);
+                       BitmapDecompress_Decompress(&decomp_state, bitmap2, 
onDiskBlockCount);
                        assert_true(false); /*should not be reached */
                }
                PG_CATCH();
@@ -281,18 +453,37 @@ test__BitmapCompression_ShortDecompress(void **state)
 static void
 test__BitmapCompression__IllegalCompressionType(void **state)
 {
-       int blockCount = 0;
        uint32 bitmap[1];
        memset(bitmap, 0, sizeof(uint32) * 1);
 
        unsigned char output[sizeof(uint32) * 5];
        memset(output, 0, sizeof(uint32) * 5);
 
+       int onDiskBlockCount = 0;
+       int bmsWordCount = 0;
+       /*
+        * When onDiskBlockCount is 1, bmsWordCount is always 1 for
+        * both 64bit and 32bit bms.
+        */
+       int expectedBmwWordCount = 1;
+       Bitmapset *bms;
+
+       /* fake a bitmapset with the bitmap data */
+       bms = (Bitmapset *) palloc0(BITMAPSET_SIZE(expectedBmwWordCount));
+       bms->nwords = expectedBmwWordCount;
+       memcpy(bms->words, bitmap, sizeof(uint32));
+
+       BitmapCompress_CalculateBlockCounts(bms,
+                                                                               
&onDiskBlockCount,
+                                                                               
&bmsWordCount);
+       assert_int_equal(1, onDiskBlockCount);
+       assert_int_equal(expectedBmwWordCount, bmsWordCount);
+
        PG_TRY();
        {
                Bitmap_Compress(
                14, 
-               bitmap, blockCount,
+               bitmap, onDiskBlockCount,
                output, sizeof(uint32) * 5);
                assert_true(false); /*should not be reached */
        }
diff --git a/src/include/access/appendonly_visimap.h 
b/src/include/access/appendonly_visimap.h
index c8d00a068f..1896395031 100644
--- a/src/include/access/appendonly_visimap.h
+++ b/src/include/access/appendonly_visimap.h
@@ -36,6 +36,12 @@
 #define APPENDONLY_VISIMAP_MAX_RANGE 32768
 #define APPENDONLY_VISIMAP_MAX_BITMAP_SIZE 4096
 
+/*
+ * The max value of visiMapEntry->bitmap->nwords
+ */
+#define APPENDONLY_VISIMAP_MAX_BITMAP_WORD_COUNT \
+       (APPENDONLY_VISIMAP_MAX_BITMAP_SIZE / sizeof(bitmapword))
+
 /*
  * Data structure for the ao visibility map processing.
  *
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 9eeabf2377..4459a5ae2b 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -25,30 +25,30 @@
  */
 struct List;
 
+#define BITMAPSET_SIZE(nwords)  \
+       (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
+
 /*
  * Data representation
  *
  * Larger bitmap word sizes generally give better performance, so long as
  * they're not wider than the processor can handle efficiently.  We use
  * 64-bit words if pointers are that large, else 32-bit words.
+ *
+ * Disable 64-bit words for big-endian machine because we are lacking
+ * big-endian machine.
+ * We do have an implemention unverified and a lot of discussions.
+ * See: https://github.com/greenplum-db/gpdb/pull/14529
+ *
+ * TODO: enable 64-bit words for big-endian machine if possible
  */
-/*
- * GPDB_12_MERGE_FIXME Disable 64-bit word size for bitmap sets.
- * Appenoptimized tables use bitmapset interface to encode tuple visibility.
- * These bitmap sets make their way to disk, inside aovisimap tuples.
- * Increasing word size will affect the ability to interpret existing
- * appendoptimized visibility data written with 32-bit word size.  Therefore,
- * we must continue to use 32-bit bitmap words or rewrite all existing
- * appendoptimized tables during upgrade (not viable).  The goal of this fixme
- * is to explore if the performance benefit of larger bitmapwords can still be
- * availed by distinguishing on-disk bitmap usage from strictly in-memory
- * bitmap usage.  E.g. define a new type bitmapword32 and use it in
- * appendoptimized code.  On a related note, tbm_bitmapword is Cloudberry
- * specific 64-bit wide type used for TID bitmaps.  Can we start using
+
+/* FIXME: tbm_bitmapword is specific 64-bit wide type used for
+ * TID bitmaps. Since we have enabled 64-bit bms, can we start using
  * bitmapword for TID bitmaps, just like upastream, and eliminate
  * tbm_bitmapword?
  */
-#if false && SIZEOF_VOID_P >= 8
+#if ((SIZEOF_VOID_P >= 8) && (!defined WORDS_BIGENDIAN))
 
 #define BITS_PER_BITMAPWORD 64
 typedef uint64 bitmapword;             /* must be an unsigned type */
diff --git a/src/include/utils/bitmap_compression.h 
b/src/include/utils/bitmap_compression.h
index cc831dfd73..46ccc22f76 100644
--- a/src/include/utils/bitmap_compression.h
+++ b/src/include/utils/bitmap_compression.h
@@ -25,6 +25,7 @@
 #define BITMAP_COMPRESSION_H
 
 #include "utils/bitstream.h"
+#include "nodes/bitmapset.h"
 
 /*
  * The compression type, which determines which compression
@@ -109,5 +110,15 @@ int Bitmap_Compress(
                unsigned char *outData,
                int maxOutDataSize);
 
+void BitmapDecompress_CalculateBlockCounts(
+       BitmapDecompressState *decompressState,
+       int *onDiskBlockCount,
+       int *bmsWordCount);
+
+void BitmapCompress_CalculateBlockCounts(
+       Bitmapset *bitmap,
+       int *onDiskBlockCount,
+       int *bmsWordCount);
+
 #endif
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to