This is an automated email from the ASF dual-hosted git repository.
gershinsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new f2f7c3ec8 Fix ColumnIndexBuilder for notIn predicate (#961)
f2f7c3ec8 is described below
commit f2f7c3ec8b22cbd119689ab321cafb659ccc59ec
Author: Huaxin Gao <[email protected]>
AuthorDate: Mon May 9 05:04:30 2022 -0700
Fix ColumnIndexBuilder for notIn predicate (#961)
---
.../column/columnindex/ColumnIndexBuilder.java | 12 +--
.../column/columnindex/TestColumnIndexBuilder.java | 100 ++++++++++-----------
.../filter2/columnindex/TestColumnIndexFilter.java | 10 +--
3 files changed, 56 insertions(+), 66 deletions(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
index 70119037e..7c221efbf 100644
---
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
+++
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
@@ -344,17 +344,7 @@ public abstract class ColumnIndexBuilder {
@Override
public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(NotIn<T>
notIn) {
- IntSet indexes = getMatchingIndexes(notIn);
- return IndexIterator.filter(getPageCount(), pageIndex ->
!indexes.contains(pageIndex));
- }
-
- private <T extends Comparable<T>> IntSet
getMatchingIndexes(SetColumnFilterPredicate<T> in) {
- IntSet matchingIndexes = new IntOpenHashSet();
- for (T value : in.getValues()) {
- Eq<T> eq = new Eq<>(in.getColumn(), value);
- visit(eq).forEachRemaining((IntConsumer) matchingIndexes::add);
- }
- return matchingIndexes;
+ return IndexIterator.all(getPageCount());
}
@Override
diff --git
a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
index 9c1d4dced..bdaa0bc8b 100644
---
a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
+++
b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
@@ -281,10 +281,10 @@ public class TestColumnIndexBuilder {
Set<Binary> set1 = new HashSet<>();
set1.add(Binary.fromString("0.0"));
assertCorrectFiltering(columnIndex, in(col, set1), 1, 4);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 5, 6, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6,
7);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5, 6);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6,
7);
assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")),
0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 7);
assertCorrectFiltering(columnIndex, gt(col, decimalBinary("2348978.45")),
1);
@@ -333,10 +333,10 @@ public class TestColumnIndexBuilder {
Set<Binary> set2 = new HashSet<>();
set2.add(decimalBinary("87656273"));
assertCorrectFiltering(columnIndex, in(col, set2), 2, 4);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 5, 6, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 2, 3, 4, 5, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 1);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7);
assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")),
0, 1, 2, 3, 5, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 6);
assertCorrectFiltering(columnIndex, gt(col, decimalBinary("87656273")), 6);
@@ -385,10 +385,10 @@ public class TestColumnIndexBuilder {
Set<Binary> set3 = new HashSet<>();
set3.add(decimalBinary("1234567890.12"));
assertCorrectFiltering(columnIndex, in(col, set3), 2, 4);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 3, 5, 6, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 1, 2, 3, 4, 6);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 5, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7);
assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("0.0")), 0,
1, 2, 3, 4, 5, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 2, 4, 5, 7);
assertCorrectFiltering(columnIndex, gt(col,
decimalBinary("1234567890.12")));
@@ -445,10 +445,10 @@ public class TestColumnIndexBuilder {
Set<Binary> set1 = new HashSet<>();
set1.add(stringBinary("Marvin"));
assertCorrectFiltering(columnIndex, in(col, set1), 1, 4, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 6, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6,
7);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 6);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6,
7);
assertCorrectFiltering(columnIndex, notEq(col,
stringBinary("Beeblebrox")), 0, 1, 2, 3, 4, 5, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5, 6);
assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1,
5);
@@ -497,10 +497,10 @@ public class TestColumnIndexBuilder {
Set<Binary> set2 = new HashSet<>();
set2.add( stringBinary("Jeltz"));
assertCorrectFiltering(columnIndex, in(col, set2), 3, 4);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 5, 6, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 6);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7);
assertCorrectFiltering(columnIndex, notEq(col,
stringBinary("Slartibartfast")), 0, 1, 2, 3, 4, 5, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 3, 4, 6);
assertCorrectFiltering(columnIndex, gt(col, stringBinary("Marvin")), 4, 6);
@@ -549,10 +549,10 @@ public class TestColumnIndexBuilder {
Set<Binary> set3 = new HashSet<>();
set3.add(stringBinary("Marvin"));
assertCorrectFiltering(columnIndex, in(col, set3), 3);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 5, 6, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 5, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7);
assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0,
1, 2, 3, 5, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 4, 7);
assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1);
@@ -664,10 +664,10 @@ public class TestColumnIndexBuilder {
Set<Binary> set = new HashSet<>();
set.add(stringBinary("Dent"));
assertCorrectFiltering(columnIndex, in(col, set), 2, 3);
- assertCorrectFiltering(columnIndex, notIn(col, set), 0, 1, 4, 5, 6, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set), 0, 1, 2, 3, 4, 5, 6,
7);
set.add(null);
assertCorrectFiltering(columnIndex, in(col, set), 0, 1, 2, 3, 4, 5, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set), new int[0]);
+ assertCorrectFiltering(columnIndex, notIn(col, set), 0, 1, 2, 3, 4, 5, 6,
7);
assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0,
1, 2, 3, 4, 5, 6, 7);
assertCorrectFiltering(columnIndex, notEq(col, null), 2, 3, 5, 7);
assertCorrectFiltering(columnIndex, userDefined(col,
BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 4, 5, 6, 7);
@@ -702,10 +702,10 @@ public class TestColumnIndexBuilder {
Set<Boolean> set1 = new HashSet<>();
set1.add(true);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 3, 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4);
assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4);
assertCorrectFiltering(columnIndex, userDefined(col,
BooleanIsTrueOrNull.class), 0, 1, 2, 3);
@@ -733,10 +733,10 @@ public class TestColumnIndexBuilder {
Set<Boolean> set2 = new HashSet<>();
set2.add(true);
assertCorrectFiltering(columnIndex, in(col, set2), 4, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 6);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 2, 3, 4, 5, 6);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 1);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
assertCorrectFiltering(columnIndex, userDefined(col,
BooleanIsTrueOrNull.class), 0, 2, 3, 4, 5, 6);
@@ -764,10 +764,10 @@ public class TestColumnIndexBuilder {
Set<Boolean> set3 = new HashSet<>();
set3.add(true);
assertCorrectFiltering(columnIndex, in(col, set3), 1, 4);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 2, 3, 5, 6);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 1, 2, 3, 4, 5, 6);
- assertCorrectFiltering(columnIndex, notIn(col, set3), new int[0]);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, true), 0, 2, 3, 4, 5, 6);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
assertCorrectFiltering(columnIndex, userDefined(col,
BooleanIsTrueOrNull.class), 0, 1, 2, 3, 4, 5, 6);
@@ -819,10 +819,10 @@ public class TestColumnIndexBuilder {
set1.add(0.0);
set1.add(-4.2);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 2, 3, 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 4, 5);
@@ -858,10 +858,10 @@ public class TestColumnIndexBuilder {
set2.add(3.5);
set2.add(-346.0);
assertCorrectFiltering(columnIndex, in(col, set2), 1, 2, 5, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 2, 3, 4, 6, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 7,
8);
- assertCorrectFiltering(columnIndex, notIn(col, set2), new int[0]);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 0.0), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, 2.99999), 7);
@@ -895,10 +895,10 @@ public class TestColumnIndexBuilder {
Set<Double> set3 = new HashSet<>();
set3.add(234.6);
assertCorrectFiltering(columnIndex, in(col, set3), 3, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 6, 7, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 3, 5);
@@ -972,10 +972,10 @@ public class TestColumnIndexBuilder {
Set<Float> set1 = new HashSet<>();
set1.add(0.0f);
assertCorrectFiltering(columnIndex, in(col, set1), 1, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 1, 2, 3, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 4, 5);
@@ -1009,10 +1009,10 @@ public class TestColumnIndexBuilder {
Set<Float> set2 = new HashSet<>();
set2.add(0.0f);
assertCorrectFiltering(columnIndex, in(col, set2), 5);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 6, 7,
8);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, 2.2f), 5, 7);
@@ -1046,10 +1046,10 @@ public class TestColumnIndexBuilder {
Set<Float> set3 = new HashSet<>();
set3.add(234.65f);
assertCorrectFiltering(columnIndex, in(col, set3), 3);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 5, 6, 7,
8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 5, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 3, 5);
@@ -1123,10 +1123,10 @@ public class TestColumnIndexBuilder {
Set<Integer> set1 = new HashSet<>();
set1.add(2);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 4, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 3);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), new int[0]);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 5);
@@ -1160,10 +1160,10 @@ public class TestColumnIndexBuilder {
Set<Integer> set2 = new HashSet<>();
set2.add(2);
assertCorrectFiltering(columnIndex, in(col, set2), 5);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 6, 7,
8);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7,
8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, 2), 7);
@@ -1198,10 +1198,10 @@ public class TestColumnIndexBuilder {
Set<Integer> set3 = new HashSet<>();
set3.add(2);
assertCorrectFiltering(columnIndex, in(col, set3), 5);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7,
8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7,
8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, gt(col, 2), 1, 3, 5);
@@ -1257,10 +1257,10 @@ public class TestColumnIndexBuilder {
Set<Integer> set1 = new HashSet<>();
set1.add(2);
assertCorrectFiltering(columnIndex, in(col, set1), 2, 4);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 3, 5);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 1, 2, 3, 4);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 5);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 4, 5);
@@ -1294,10 +1294,10 @@ public class TestColumnIndexBuilder {
Set<Integer> set2 = new HashSet<>();
set2.add(2);
assertCorrectFiltering(columnIndex, in(col, set2), 2);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 4, 5, 6, 7,
8);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 6, 8);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 5, 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7,
8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, 0xEE), 7);
@@ -1332,10 +1332,10 @@ public class TestColumnIndexBuilder {
Set<Integer> set3 = new HashSet<>();
set3.add(0xAB);
assertCorrectFiltering(columnIndex, in(col, set3), 5);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7,
8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 0xFF), 0, 2, 3, 4, 5, 6, 7,
8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, gt(col, 0xFF));
@@ -1375,10 +1375,10 @@ public class TestColumnIndexBuilder {
Set<Long> set1 = new HashSet<>();
set1.add(0l);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 2, 3, 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
set1.add(null);
assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set1), 4);
+ assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, 0l), 0, 1, 2, 3, 4, 5);
assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
assertCorrectFiltering(columnIndex, gt(col, 2l), 0, 1, 5);
@@ -1412,10 +1412,10 @@ public class TestColumnIndexBuilder {
Set<Long> set2 = new HashSet<>();
set2.add(-42l);
assertCorrectFiltering(columnIndex, in(col, set2), 2, 5);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 4, 6, 7, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set2.add(null);
assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
- assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
+ assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, -42l), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
assertCorrectFiltering(columnIndex, gt(col, 2l), 7);
@@ -1450,10 +1450,10 @@ public class TestColumnIndexBuilder {
Set<Long> set3 = new HashSet<>();
set3.add(0l);
assertCorrectFiltering(columnIndex, in(col, set3), 5);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7,
8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
set3.add(null);
assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
- assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+ assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, 0l), 0, 1, 2, 3, 4, 5, 6,
7, 8);
assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
assertCorrectFiltering(columnIndex, gt(col, 2l), 1, 3, 5);
diff --git
a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
index 47ea5fc5c..27304a14e 100644
---
a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
+++
b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
@@ -372,7 +372,7 @@ public class TestColumnIndexFilter {
assertRows(calculateRowRanges(FilterCompat.get(in(intColumn("column1"),
set1)), STORE, paths, TOTAL_ROW_COUNT),
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13);
assertRows(calculateRowRanges(FilterCompat.get(notIn(intColumn("column1"),
set1)), STORE, paths, TOTAL_ROW_COUNT),
- 1, 2, 3, 4, 5, 6, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29);
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
Set<Binary> set2 = new HashSet<>();
set2.add(fromString("Zulu"));
@@ -380,24 +380,24 @@ public class TestColumnIndexFilter {
assertRows(calculateRowRanges(FilterCompat.get(in(binaryColumn("column2"),
set2)), STORE, paths, TOTAL_ROW_COUNT),
0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29);
assertRows(calculateRowRanges(FilterCompat.get(notIn(binaryColumn("column2"),
set2)), STORE, paths, TOTAL_ROW_COUNT),
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28);
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
Set<Double> set3 = new HashSet<>();
set3.add(2.03);
assertRows(calculateRowRanges(FilterCompat.get(in(doubleColumn("column3"),
set3)), STORE, paths, TOTAL_ROW_COUNT),
0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20, 21, 22);
assertRows(calculateRowRanges(FilterCompat.get(notIn(doubleColumn("column3"),
set3)), STORE, paths, TOTAL_ROW_COUNT),
- 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 23, 24, 25, 26, 27, 28, 29);
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
set3.add(9.98);
assertRows(calculateRowRanges(FilterCompat.get(in(doubleColumn("column3"),
set3)), STORE, paths, TOTAL_ROW_COUNT),
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25);
assertRows(calculateRowRanges(FilterCompat.get(notIn(doubleColumn("column3"),
set3)), STORE, paths, TOTAL_ROW_COUNT),
- 6, 7, 8, 9, 23, 24, 25, 26, 27, 28, 29);
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
set3.add(null);
assertRows(calculateRowRanges(FilterCompat.get(in(doubleColumn("column3"),
set3)), STORE, paths, TOTAL_ROW_COUNT),
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
assertRows(calculateRowRanges(FilterCompat.get(notIn(doubleColumn("column3"),
set3)), STORE, paths, TOTAL_ROW_COUNT),
- 23, 24, 25);
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
Set<Boolean> set4 = new HashSet<>();
set4.add(null);