This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new b53da94ee2 [core] Fix BSI reader predicate pruning for Long.MIN_VALUE
boundary (#8150)
b53da94ee2 is described below
commit b53da94ee277bd5cc7aaf83af8f5f2a82d7786c0
Author: lxy <[email protected]>
AuthorDate: Sun Jun 7 19:52:54 2026 +0800
[core] Fix BSI reader predicate pruning for Long.MIN_VALUE boundary (#8150)
---
.../bsi/BitSliceIndexBitmapFileIndex.java | 30 ++++++--
.../bsi/BitSliceIndexBitmapFileIndexTest.java | 81 ++++++++++++++++++++++
2 files changed, 105 insertions(+), 6 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java
index 680008a736..f9f0f95cec 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java
@@ -240,7 +240,11 @@ public class BitSliceIndexBitmapFileIndex implements
FileIndexer {
.map(valueMapper)
.map(
value -> {
- if (value < 0) {
+ if (value == Long.MIN_VALUE) {
+ // Writer cannot store
Long.MIN_VALUE, so no
+ // row can match it
+ return new
RoaringBitmap32();
+ } else if (value < 0) {
return
negative.eq(Math.abs(value));
} else {
return positive.eq(value);
@@ -262,7 +266,9 @@ public class BitSliceIndexBitmapFileIndex implements
FileIndexer {
.map(valueMapper)
.map(
value -> {
- if (value < 0) {
+ if (value ==
Long.MIN_VALUE) {
+ return new
RoaringBitmap32();
+ } else if (value < 0) {
return
negative.eq(Math.abs(value));
} else {
return
positive.eq(value);
@@ -280,7 +286,10 @@ public class BitSliceIndexBitmapFileIndex implements
FileIndexer {
return new BitmapIndexResult(
() -> {
Long value = valueMapper.apply(literal);
- if (value < 0) {
+ if (value == Long.MIN_VALUE) {
+ // Nothing is less than Long.MIN_VALUE
+ return new RoaringBitmap32();
+ } else if (value < 0) {
return negative.gt(Math.abs(value));
} else {
return RoaringBitmap32.or(positive.lt(value),
negative.isNotNull());
@@ -293,7 +302,10 @@ public class BitSliceIndexBitmapFileIndex implements
FileIndexer {
return new BitmapIndexResult(
() -> {
Long value = valueMapper.apply(literal);
- if (value < 0) {
+ if (value == Long.MIN_VALUE) {
+ // Writer cannot store Long.MIN_VALUE, so no row
can match
+ return new RoaringBitmap32();
+ } else if (value < 0) {
return negative.gte(Math.abs(value));
} else {
return RoaringBitmap32.or(positive.lte(value),
negative.isNotNull());
@@ -306,7 +318,10 @@ public class BitSliceIndexBitmapFileIndex implements
FileIndexer {
return new BitmapIndexResult(
() -> {
Long value = valueMapper.apply(literal);
- if (value < 0) {
+ if (value == Long.MIN_VALUE) {
+ // Everything is greater than Long.MIN_VALUE
(writer cannot store it)
+ return RoaringBitmap32.or(positive.isNotNull(),
negative.isNotNull());
+ } else if (value < 0) {
return RoaringBitmap32.or(
positive.isNotNull(),
negative.lt(Math.abs(value)));
} else {
@@ -320,7 +335,10 @@ public class BitSliceIndexBitmapFileIndex implements
FileIndexer {
return new BitmapIndexResult(
() -> {
Long value = valueMapper.apply(literal);
- if (value < 0) {
+ if (value == Long.MIN_VALUE) {
+ // All non-null rows satisfy x >= Long.MIN_VALUE
+ return RoaringBitmap32.or(positive.isNotNull(),
negative.isNotNull());
+ } else if (value < 0) {
return RoaringBitmap32.or(
positive.isNotNull(),
negative.lte(Math.abs(value)));
} else {
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java
index b55e2e77e1..ad60831ea2 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java
@@ -23,6 +23,7 @@ import org.apache.paimon.fileindex.FileIndexWriter;
import org.apache.paimon.fileindex.bitmap.BitmapIndexResult;
import org.apache.paimon.fs.ByteArraySeekableStream;
import org.apache.paimon.predicate.FieldRef;
+import org.apache.paimon.types.BigIntType;
import org.apache.paimon.types.IntType;
import org.apache.paimon.utils.RoaringBitmap32;
@@ -31,6 +32,7 @@ import org.junit.jupiter.api.Test;
import java.util.Arrays;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
/** test for {@link BitSliceIndexBitmapFileIndex}. */
public class BitSliceIndexBitmapFileIndexTest {
@@ -250,4 +252,83 @@ public class BitSliceIndexBitmapFileIndexTest {
assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef,
1)).get())
.isEqualTo(RoaringBitmap32.bitmapOf());
}
+
+ @Test
+ public void testReaderPredicatePruningWithLongMinValue() {
+ BigIntType bigIntType = new BigIntType();
+ FieldRef fieldRef = new FieldRef(0, "", bigIntType);
+ BitSliceIndexBitmapFileIndex bsiFileIndex = new
BitSliceIndexBitmapFileIndex(bigIntType);
+ FileIndexWriter writer = bsiFileIndex.createWriter();
+
+ // Use values that include negative numbers but NOT Long.MIN_VALUE
itself
+ // (since the writer cannot handle it). This isolates the reader-side
bug.
+ // Data: [-100, -1, null, 0, 1, 50]
+ Object[] arr = {-100L, -1L, null, 0L, 1L, 50L};
+
+ for (Object o : arr) {
+ writer.write(o);
+ }
+ byte[] bytes = writer.serializedBytes();
+ ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes);
+ FileIndexReader reader = bsiFileIndex.createReader(stream, 0,
bytes.length);
+
+ // All non-null row ids: {0, 1, 3, 4, 5}
+
+ // x > Long.MIN_VALUE: every int64 value > Long.MIN_VALUE (since no
row IS Long.MIN_VALUE),
+ // so result should be ALL non-null rows = {0, 1, 3, 4, 5}
+ RoaringBitmap32 gtResult =
+ ((BitmapIndexResult) reader.visitGreaterThan(fieldRef,
Long.MIN_VALUE)).get();
+ assertThat(gtResult)
+ .as("x > Long.MIN_VALUE should return all non-null rows")
+ .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 3, 4, 5));
+
+ // x >= Long.MIN_VALUE: same — all non-null rows satisfy this
+ RoaringBitmap32 gteResult =
+ ((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef,
Long.MIN_VALUE)).get();
+ assertThat(gteResult)
+ .as("x >= Long.MIN_VALUE should return all non-null rows")
+ .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 3, 4, 5));
+
+ // x < Long.MIN_VALUE: no int64 value is less than Long.MIN_VALUE, so
result should be
+ // empty
+ RoaringBitmap32 ltResult =
+ ((BitmapIndexResult) reader.visitLessThan(fieldRef,
Long.MIN_VALUE)).get();
+ assertThat(ltResult)
+ .as("x < Long.MIN_VALUE should return empty")
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+
+ // x <= Long.MIN_VALUE: no row has Long.MIN_VALUE, so result should be
empty
+ RoaringBitmap32 lteResult =
+ ((BitmapIndexResult) reader.visitLessOrEqual(fieldRef,
Long.MIN_VALUE)).get();
+ assertThat(lteResult)
+ .as("x <= Long.MIN_VALUE should return empty (no row has that
value)")
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+
+ // x == Long.MIN_VALUE: no row has Long.MIN_VALUE, so result should be
empty
+ RoaringBitmap32 eqResult =
+ ((BitmapIndexResult) reader.visitEqual(fieldRef,
Long.MIN_VALUE)).get();
+ assertThat(eqResult)
+ .as("x == Long.MIN_VALUE should return empty")
+ .isEqualTo(RoaringBitmap32.bitmapOf());
+
+ // x != Long.MIN_VALUE: all non-null rows (no row has Long.MIN_VALUE)
+ RoaringBitmap32 neqResult =
+ ((BitmapIndexResult) reader.visitNotEqual(fieldRef,
Long.MIN_VALUE)).get();
+ assertThat(neqResult)
+ .as("x != Long.MIN_VALUE should return all non-null rows")
+ .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 3, 4, 5));
+ }
+
+ @Test
+ public void testWriterCannotHandleLongMinValue() {
+ BigIntType bigIntType = new BigIntType();
+ BitSliceIndexBitmapFileIndex bsiFileIndex = new
BitSliceIndexBitmapFileIndex(bigIntType);
+ FileIndexWriter writer = bsiFileIndex.createWriter();
+ writer.write(Long.MIN_VALUE);
+
+ assertThatThrownBy(writer::serializedBytes)
+ .isInstanceOf(RuntimeException.class)
+ .hasCauseInstanceOf(IllegalArgumentException.class)
+ .hasRootCauseMessage("values should be non-negative");
+ }
}