This is an automated email from the ASF dual-hosted git repository. jiangtian pushed a commit to branch dev/1.1 in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/dev/1.1 by this push: new 5ad71628 feat: add markRange / unmarkRange / merge for high-performance bit manipulation (#575) (#580) 5ad71628 is described below commit 5ad7162893301a4ee28a0cd32bd71125e2154983 Author: Zhenyu Luo <zhe...@apache.org> AuthorDate: Mon Sep 1 10:11:07 2025 +0800 feat: add markRange / unmarkRange / merge for high-performance bit manipulation (#575) (#580) * feat: add markRange / unmarkRange / merge for high-performance bit manipulation * fix # Conflicts: # java/tsfile/src/test/java/org/apache/tsfile/utils/BitMapTest.java --- .../main/java/org/apache/tsfile/utils/BitMap.java | 90 +++++++++++++++++ .../java/org/apache/tsfile/utils/BitMapTest.java | 110 +++++++++++++++++++++ 2 files changed, 200 insertions(+) diff --git a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java index bc454295..acce6065 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java @@ -75,6 +75,34 @@ public class BitMap { bits[position / Byte.SIZE] |= BIT_UTIL[position % Byte.SIZE]; } + public void markRange(int startPosition, int length) { + if (length <= 0) { + return; + } + + if (startPosition < 0 || startPosition + length > size) { + throw new IndexOutOfBoundsException( + "startPosition " + startPosition + " + length " + length + " is out of range " + size); + } + + int bitEnd = startPosition + length - 1; + int byte0 = startPosition >>> 3; + int byte1 = bitEnd >>> 3; + + if (byte0 == byte1) { + bits[byte0] |= (byte) (((1 << length) - 1) << (startPosition & 7)); + return; + } + + bits[byte0++] |= (byte) (0xFF << (startPosition & 7)); + + while (byte0 < byte1) { + bits[byte0++] = (byte) 0xFF; + } + + bits[byte1] |= (byte) (0xFF >>> (7 - (bitEnd & 7))); + } + /** mark as 0 at all positions. */ public void reset() { Arrays.fill(bits, (byte) 0); @@ -84,6 +112,68 @@ public class BitMap { bits[position / Byte.SIZE] &= UNMARK_BIT_UTIL[position % Byte.SIZE]; } + public void unmarkRange(int startPosition, int length) { + if (length <= 0) { + return; + } + + if (startPosition < 0 || startPosition + length > size) { + throw new IndexOutOfBoundsException( + "startPosition " + startPosition + " + length " + length + " is out of range " + size); + } + + int bitEnd = startPosition + length - 1; + int byte0 = startPosition >>> 3; + int byte1 = bitEnd >>> 3; + + if (byte0 == byte1) { + bits[byte0] &= (byte) ~(((1 << length) - 1) << (startPosition & 7)); + return; + } + + bits[byte0++] &= (byte) ~(0xFF << (startPosition & 7)); + + while (byte0 < byte1) { + bits[byte0++] = 0; + } + + bits[byte1] &= (byte) (0xFF << ((bitEnd & 7) + 1)); + } + + public void merge(BitMap src, int srcStart, int destStart, int len) { + if (len <= 0) return; + if (srcStart < 0 || destStart < 0 || srcStart + len > src.size || destStart + len > this.size) { + throw new IndexOutOfBoundsException(); + } + + int done = 0; + int dstBit = destStart & 7; + while (done < len) { + int size = Math.min(len - done, 64); + long bits = extractBits(src.bits, srcStart + done, size); + int destStartByte = (destStart + done) >>> 3; + this.bits[destStartByte++] |= (byte) ((bits << dstBit) & 255L); + bits = bits >>> (8 - dstBit); + while (bits > 0L) { + this.bits[destStartByte++] |= (byte) (bits & 255L); + bits = bits >>> 8; + } + done += size; + } + } + + private long extractBits(byte[] buf, int off, int len) { + int start = off >>> 3; + int size = 8 - (off & 7); + long val = (buf[start++] & 0xFFL) >>> (off & 7); + while (size < len) { + val |= ((buf[start++] & 0xFFL) << size); + size += 8; + } + + return val & (0xffff_ffff_ffff_ffffL >>> (64 - len)); + } + /** whether all bits are zero, i.e., no Null value */ public boolean isAllUnmarked() { int j; diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/BitMapTest.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/BitMapTest.java index d441e6b1..ae6e8f22 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/utils/BitMapTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/BitMapTest.java @@ -20,6 +20,10 @@ package org.apache.tsfile.utils; import org.junit.Test; +import java.util.Arrays; +import java.util.Random; + +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -64,4 +68,110 @@ public class BitMapTest { assertEquals(bitmap1.isMarked(i), bitmap2.isMarked(i)); } } + + @Test + public void exhaustiveMergeTest() { + int maxLen = 96; + int maxSize = 128; + for (int i = 1; i <= maxLen; i++) { + for (int j = i; j <= maxSize; j++) { + for (int k = 0; k <= j - i; k++) { + for (int m = 0; m <= maxSize - i; m++) { + runOneCase(j, k, maxSize, m, i); + } + } + } + } + } + + private static void runOneCase(int srcSize, int srcStart, int destSize, int destStart, int len) { + Random r = new Random(); + BitMap src = new BitMap(srcSize); + BitMap dst = new BitMap(destSize); + + for (int i = 0; i < src.getSize(); i++) { + if (r.nextBoolean()) { + src.mark(i); + } + } + + for (int i = 0; i < dst.getSize(); i++) { + if (r.nextBoolean()) { + dst.mark(i); + } + } + + BitMap copy = + new BitMap(src.getSize(), Arrays.copyOf(dst.getByteArray(), dst.getByteArray().length)); + + for (int i = 0; i < len; i++) { + if (src.isMarked(srcStart + i)) { + copy.mark(destStart + i); + } + } + + dst.merge(src, srcStart, destStart, len); + assertArrayEquals(copy.getByteArray(), dst.getByteArray()); + } + + @Test + public void emptyRange() { + BitMap map = new BitMap(1); + map.markRange(0, 0); + assertEquals((byte) 0x00, map.getByteArray()[0]); + } + + @Test + public void singleByteAllBits() { + for (int i = 0; i < 8; i++) { + for (int j = 0; j <= 8 - i; j++) { + doTest(8, i, j); + } + } + } + + @Test + public void twoBytesHeadTail() { + for (int i = 0; i < 64; i++) { + for (int j = 0; j <= 64 - i; j += 8) { + doTest(64, i, j); + } + } + } + + @Test + public void twoBytesPartialHead() { + for (int i = 0; i < 64; i += 8) { + for (int j = 0; j <= 64 - i; j += 8) { + doTest(64, i, j); + } + } + } + + @Test + public void twoBytesPartialTail() { + int size = 64; + for (int i = 0; i < size; i += 8) { + for (int j = 1; j <= size - i; j++) { + doTest(size, i, j); + } + } + } + + private void doTest(int size, int start, int length) { + BitMap map = new BitMap(size); + BitMap bitMap = new BitMap(size); + map.markRange(start, length); + for (int i = start; i < start + length; i++) { + bitMap.mark(i); + } + assertArrayEquals(bitMap.getByteArray(), map.getByteArray()); + + map.unmarkRange(start, length); + for (int i = start; i < start + length; i++) { + bitMap.unmark(i); + } + System.out.println(start + " " + length); + assertArrayEquals(bitMap.getByteArray(), map.getByteArray()); + } }