This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch sidx/query in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git
commit 74766cad5554fb4e11dccf459959d50f7f57f5e9 Author: Gao Hongtao <hanahm...@gmail.com> AuthorDate: Sat Aug 23 22:28:46 2025 +0800 Refactor block structure: Remove elementIDs from block and related methods, simplifying data validation and serialization. Update tests to reflect changes in data handling and ensure consistency in block operations. --- banyand/internal/sidx/block.go | 33 ++++++++++++--------------------- banyand/internal/sidx/block_test.go | 7 ++----- banyand/internal/sidx/part.go | 3 +-- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/banyand/internal/sidx/block.go b/banyand/internal/sidx/block.go index 45d71fd8..a0ece632 100644 --- a/banyand/internal/sidx/block.go +++ b/banyand/internal/sidx/block.go @@ -41,10 +41,9 @@ type block struct { // Tag data organized by tag name (pointer field - 8 bytes) tags map[string]*tagData // Runtime tag data with filtering - // Core data arrays (all same length - pointer fields - 24 bytes total) - userKeys []int64 // User-provided ordering keys - elementIDs []uint64 // Unique element identifiers - data [][]byte // User payload data + // Core data arrays (all same length - pointer fields) + userKeys []int64 // User-provided ordering keys + data [][]byte // User payload data // Internal state (bool field - 1 byte, padded to 8 bytes) pooled bool // Whether this block came from pool @@ -82,7 +81,6 @@ func releaseBlock(b *block) { // reset clears block for reuse in object pool. func (b *block) reset() { b.userKeys = b.userKeys[:0] - b.elementIDs = b.elementIDs[:0] for i := range b.data { b.data[i] = b.data[i][:0] @@ -109,10 +107,6 @@ func (b *block) mustInitFromElements(elems *elements) { // Copy core data b.userKeys = append(b.userKeys, elems.userKeys...) - b.elementIDs = make([]uint64, len(elems.userKeys)) - for i := range b.elementIDs { - b.elementIDs[i] = uint64(i) // Generate sequential IDs - } b.data = append(b.data, elems.data...) // Process tags @@ -203,9 +197,9 @@ func (b *block) processTag(tagName string, elementTags [][]tag) { // validate ensures block data consistency. func (b *block) validate() error { count := len(b.userKeys) - if count != len(b.elementIDs) || count != len(b.data) { - return fmt.Errorf("inconsistent block arrays: keys=%d, ids=%d, data=%d", - len(b.userKeys), len(b.elementIDs), len(b.data)) + if count != len(b.data) { + return fmt.Errorf("inconsistent block arrays: keys=%d, data=%d", + len(b.userKeys), len(b.data)) } // Verify sorting by userKey @@ -230,7 +224,7 @@ func (b *block) validate() error { // uncompressedSizeBytes calculates the uncompressed size of the block. func (b *block) uncompressedSizeBytes() uint64 { count := uint64(len(b.userKeys)) - size := count * (8 + 8) // userKey + elementID + size := count * 8 // userKey // Add data payload sizes for _, payload := range b.data { @@ -274,7 +268,7 @@ func (b *block) getKeyRange() (int64, int64) { } // mustWriteTo writes block data to files through the provided writers. -// This method serializes the block's userKeys, elementIDs, data, and tags +// This method serializes the block's userKeys, data, and tags // to their respective files while updating the block metadata. func (b *block) mustWriteTo(sid common.SeriesID, bm *blockMetadata, ww *writers) { if err := b.validate(); err != nil { @@ -286,8 +280,8 @@ func (b *block) mustWriteTo(sid common.SeriesID, bm *blockMetadata, ww *writers) bm.uncompressedSize = b.uncompressedSizeBytes() bm.count = uint64(b.Len()) - // Write user keys and element IDs to keys.bin - mustWriteKeysTo(&bm.keysBlock, b.userKeys, b.elementIDs, &ww.keysWriter) + // Write user keys to keys.bin + mustWriteKeysTo(&bm.keysBlock, b.userKeys, &ww.keysWriter) // Write data payloads to data.bin mustWriteDataTo(&bm.dataBlock, b.data, &ww.dataWriter) @@ -357,8 +351,8 @@ func (b *block) mustWriteTag(tagName string, td *tagData, bm *blockMetadata, ww tagMeta.size = uint64(len(bb.Buf)) } -// mustWriteKeysTo writes user keys and element IDs to the keys writer. -func mustWriteKeysTo(kb *dataBlock, userKeys []int64, elementIDs []uint64, keysWriter *writer) { +// mustWriteKeysTo writes user keys to the keys writer. +func mustWriteKeysTo(kb *dataBlock, userKeys []int64, keysWriter *writer) { bb := bigValuePool.Get() if bb == nil { bb = &bytes.Buffer{} @@ -371,9 +365,6 @@ func mustWriteKeysTo(kb *dataBlock, userKeys []int64, elementIDs []uint64, keysW // Encode user keys bb.Buf, _, _ = encoding.Int64ListToBytes(bb.Buf[:0], userKeys) - // Encode element IDs - bb.Buf = encoding.VarUint64sToBytes(bb.Buf, elementIDs) - // Compress and write compressedData := zstd.Compress(nil, bb.Buf, 1) kb.offset = keysWriter.bytesWritten diff --git a/banyand/internal/sidx/block_test.go b/banyand/internal/sidx/block_test.go index 7158eea3..3b972a62 100644 --- a/banyand/internal/sidx/block_test.go +++ b/banyand/internal/sidx/block_test.go @@ -119,8 +119,8 @@ func TestBlock_Validation(t *testing.T) { } // Add inconsistent data - b.userKeys = append(b.userKeys, 100, 200) - b.elementIDs = append(b.elementIDs, 1) // Only one element ID for two keys + b.userKeys = append(b.userKeys, 100) + b.data = append(b.data, []byte("dummy"), []byte("dummy2")) // Should fail validation if err := b.validate(); err == nil { @@ -139,7 +139,6 @@ func TestBlock_SizeCalculation(t *testing.T) { // Add some data b.userKeys = append(b.userKeys, 100, 200) - b.elementIDs = append(b.elementIDs, 1, 2) b.data = append(b.data, []byte("test1"), []byte("test2")) // Should have non-zero size @@ -160,7 +159,6 @@ func TestBlock_IsFull(t *testing.T) { // Add elements up to the limit for i := 0; i < maxElementsPerBlock-1; i++ { b.userKeys = append(b.userKeys, int64(i)) - b.elementIDs = append(b.elementIDs, uint64(i)) b.data = append(b.data, []byte("data")) } @@ -171,7 +169,6 @@ func TestBlock_IsFull(t *testing.T) { // Add one more element to reach the limit b.userKeys = append(b.userKeys, int64(maxElementsPerBlock)) - b.elementIDs = append(b.elementIDs, uint64(maxElementsPerBlock)) b.data = append(b.data, []byte("data")) // Should now be full diff --git a/banyand/internal/sidx/part.go b/banyand/internal/sidx/part.go index 8570ed16..f4246e23 100644 --- a/banyand/internal/sidx/part.go +++ b/banyand/internal/sidx/part.go @@ -558,8 +558,7 @@ func openMemPart(mp *memPart) *part { // This is a utility function similar to the stream module. func uncompressedElementSizeBytes(index int, es *elements) uint64 { // 8 bytes for user key - // 8 bytes for elementID - n := uint64(8 + 8) + n := uint64(8) // Add data payload size if index < len(es.data) && es.data[index] != nil {