This is an automated email from the ASF dual-hosted git repository.

gershinsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new f2f7c3ec8 Fix ColumnIndexBuilder for notIn predicate (#961)
f2f7c3ec8 is described below

commit f2f7c3ec8b22cbd119689ab321cafb659ccc59ec
Author: Huaxin Gao <[email protected]>
AuthorDate: Mon May 9 05:04:30 2022 -0700

    Fix ColumnIndexBuilder for notIn predicate (#961)
---
 .../column/columnindex/ColumnIndexBuilder.java     |  12 +--
 .../column/columnindex/TestColumnIndexBuilder.java | 100 ++++++++++-----------
 .../filter2/columnindex/TestColumnIndexFilter.java |  10 +--
 3 files changed, 56 insertions(+), 66 deletions(-)

diff --git 
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
 
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
index 70119037e..7c221efbf 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
@@ -344,17 +344,7 @@ public abstract class ColumnIndexBuilder {
 
     @Override
     public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(NotIn<T> 
notIn) {
-      IntSet indexes = getMatchingIndexes(notIn);
-      return IndexIterator.filter(getPageCount(), pageIndex -> 
!indexes.contains(pageIndex));
-    }
-
-    private <T extends Comparable<T>> IntSet 
getMatchingIndexes(SetColumnFilterPredicate<T> in) {
-      IntSet matchingIndexes = new IntOpenHashSet();
-      for (T value : in.getValues()) {
-        Eq<T> eq = new Eq<>(in.getColumn(), value);
-        visit(eq).forEachRemaining((IntConsumer) matchingIndexes::add);
-      }
-      return matchingIndexes;
+      return IndexIterator.all(getPageCount());
     }
 
     @Override
diff --git 
a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
 
b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
index 9c1d4dced..bdaa0bc8b 100644
--- 
a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
+++ 
b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
@@ -281,10 +281,10 @@ public class TestColumnIndexBuilder {
     Set<Binary> set1 = new HashSet<>();
     set1.add(Binary.fromString("0.0"));
     assertCorrectFiltering(columnIndex, in(col, set1), 1, 4);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6, 
7);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5, 6);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6, 
7);
     assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 
0, 1, 2, 3, 4, 5, 6);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 7);
     assertCorrectFiltering(columnIndex, gt(col, decimalBinary("2348978.45")), 
1);
@@ -333,10 +333,10 @@ public class TestColumnIndexBuilder {
     Set<Binary> set2 = new HashSet<>();
     set2.add(decimalBinary("87656273"));
     assertCorrectFiltering(columnIndex, in(col, set2), 2, 4);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 2, 3, 4, 5, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 1);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7);
     assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 
0, 1, 2, 3, 5, 6, 7);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 6);
     assertCorrectFiltering(columnIndex, gt(col, decimalBinary("87656273")), 6);
@@ -385,10 +385,10 @@ public class TestColumnIndexBuilder {
     Set<Binary> set3 = new HashSet<>();
     set3.add(decimalBinary("1234567890.12"));
     assertCorrectFiltering(columnIndex, in(col, set3), 2, 4);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 1, 2, 3, 4, 6);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 5, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7);
     assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("0.0")), 0, 
1, 2, 3, 4, 5, 6, 7);
     assertCorrectFiltering(columnIndex, notEq(col, null), 2, 4, 5, 7);
     assertCorrectFiltering(columnIndex, gt(col, 
decimalBinary("1234567890.12")));
@@ -445,10 +445,10 @@ public class TestColumnIndexBuilder {
     Set<Binary> set1 = new HashSet<>();
     set1.add(stringBinary("Marvin"));
     assertCorrectFiltering(columnIndex, in(col, set1), 1, 4, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 6, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6, 
7);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 6);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5, 6, 
7);
     assertCorrectFiltering(columnIndex, notEq(col, 
stringBinary("Beeblebrox")), 0, 1, 2, 3, 4, 5, 7);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5, 6);
     assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1, 
5);
@@ -497,10 +497,10 @@ public class TestColumnIndexBuilder {
     Set<Binary> set2 = new HashSet<>();
     set2.add( stringBinary("Jeltz"));
     assertCorrectFiltering(columnIndex, in(col, set2), 3, 4);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 6);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7);
     assertCorrectFiltering(columnIndex, notEq(col, 
stringBinary("Slartibartfast")), 0, 1, 2, 3, 4, 5, 7);
     assertCorrectFiltering(columnIndex, notEq(col, null), 0, 3, 4, 6);
     assertCorrectFiltering(columnIndex, gt(col, stringBinary("Marvin")), 4, 6);
@@ -549,10 +549,10 @@ public class TestColumnIndexBuilder {
     Set<Binary> set3 = new HashSet<>();
     set3.add(stringBinary("Marvin"));
     assertCorrectFiltering(columnIndex, in(col, set3), 3);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 5, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7);
     assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0, 
1, 2, 3, 5, 6, 7);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 4, 7);
     assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1);
@@ -664,10 +664,10 @@ public class TestColumnIndexBuilder {
     Set<Binary> set = new HashSet<>();
     set.add(stringBinary("Dent"));
     assertCorrectFiltering(columnIndex, in(col, set), 2, 3);
-    assertCorrectFiltering(columnIndex, notIn(col, set), 0, 1, 4, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set), 0, 1, 2, 3, 4, 5, 6, 
7);
     set.add(null);
     assertCorrectFiltering(columnIndex, in(col, set), 0, 1, 2, 3, 4, 5, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set), new int[0]);
+    assertCorrectFiltering(columnIndex, notIn(col, set), 0, 1, 2, 3, 4, 5, 6, 
7);
     assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0, 
1, 2, 3, 4, 5, 6, 7);
     assertCorrectFiltering(columnIndex, notEq(col, null), 2, 3, 5, 7);
     assertCorrectFiltering(columnIndex, userDefined(col, 
BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 4, 5, 6, 7);
@@ -702,10 +702,10 @@ public class TestColumnIndexBuilder {
     Set<Boolean> set1 = new HashSet<>();
     set1.add(true);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 3, 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4);
     assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4);
     assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4);
     assertCorrectFiltering(columnIndex, userDefined(col, 
BooleanIsTrueOrNull.class), 0, 1, 2, 3);
@@ -733,10 +733,10 @@ public class TestColumnIndexBuilder {
     Set<Boolean> set2 = new HashSet<>();
     set2.add(true);
     assertCorrectFiltering(columnIndex, in(col, set2), 4, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 6);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 2, 3, 4, 5, 6);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 1);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6);
     assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4, 5, 6);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
     assertCorrectFiltering(columnIndex, userDefined(col, 
BooleanIsTrueOrNull.class), 0, 2, 3, 4, 5, 6);
@@ -764,10 +764,10 @@ public class TestColumnIndexBuilder {
     Set<Boolean> set3 = new HashSet<>();
     set3.add(true);
     assertCorrectFiltering(columnIndex, in(col, set3), 1, 4);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 2, 3, 5, 6);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 1, 2, 3, 4, 5, 6);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), new int[0]);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6);
     assertCorrectFiltering(columnIndex, notEq(col, true), 0, 2, 3, 4, 5, 6);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
     assertCorrectFiltering(columnIndex, userDefined(col, 
BooleanIsTrueOrNull.class), 0, 1, 2, 3, 4, 5, 6);
@@ -819,10 +819,10 @@ public class TestColumnIndexBuilder {
     set1.add(0.0);
     set1.add(-4.2);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 2, 3, 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
     assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 4, 5);
@@ -858,10 +858,10 @@ public class TestColumnIndexBuilder {
     set2.add(3.5);
     set2.add(-346.0);
     assertCorrectFiltering(columnIndex, in(col, set2), 1, 2, 5, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 2, 3, 4, 6, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 7, 
8);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), new int[0]);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 0.0), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
     assertCorrectFiltering(columnIndex, gt(col, 2.99999), 7);
@@ -895,10 +895,10 @@ public class TestColumnIndexBuilder {
     Set<Double> set3 = new HashSet<>();
     set3.add(234.6);
     assertCorrectFiltering(columnIndex, in(col, set3), 3, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
     assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 3, 5);
@@ -972,10 +972,10 @@ public class TestColumnIndexBuilder {
     Set<Float> set1 = new HashSet<>();
     set1.add(0.0f);
     assertCorrectFiltering(columnIndex, in(col, set1), 1, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 2, 3, 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 1, 2, 3, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
     assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 4, 5);
@@ -1009,10 +1009,10 @@ public class TestColumnIndexBuilder {
     Set<Float> set2 = new HashSet<>();
     set2.add(0.0f);
     assertCorrectFiltering(columnIndex, in(col, set2), 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 6, 7, 
8);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
     assertCorrectFiltering(columnIndex, gt(col, 2.2f), 5, 7);
@@ -1046,10 +1046,10 @@ public class TestColumnIndexBuilder {
     Set<Float> set3 = new HashSet<>();
     set3.add(234.65f);
     assertCorrectFiltering(columnIndex, in(col, set3), 3);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 4, 5, 6, 7, 
8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 5, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
     assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 3, 5);
@@ -1123,10 +1123,10 @@ public class TestColumnIndexBuilder {
     Set<Integer> set1 = new HashSet<>();
     set1.add(2);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 4, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 3);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 4, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), new int[0]);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
     assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 5);
@@ -1160,10 +1160,10 @@ public class TestColumnIndexBuilder {
     Set<Integer> set2 = new HashSet<>();
     set2.add(2);
     assertCorrectFiltering(columnIndex, in(col, set2), 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 6, 7, 
8);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 
8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
     assertCorrectFiltering(columnIndex, gt(col, 2), 7);
@@ -1198,10 +1198,10 @@ public class TestColumnIndexBuilder {
     Set<Integer> set3 = new HashSet<>();
     set3.add(2);
     assertCorrectFiltering(columnIndex, in(col, set3), 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7, 
8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 
8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
     assertCorrectFiltering(columnIndex, gt(col, 2), 1, 3, 5);
@@ -1257,10 +1257,10 @@ public class TestColumnIndexBuilder {
     Set<Integer> set1 = new HashSet<>();
     set1.add(2);
     assertCorrectFiltering(columnIndex, in(col, set1), 2, 4);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 3, 5);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 1, 2, 3, 4);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 5);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
     assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 4, 5);
@@ -1294,10 +1294,10 @@ public class TestColumnIndexBuilder {
     Set<Integer> set2 = new HashSet<>();
     set2.add(2);
     assertCorrectFiltering(columnIndex, in(col, set2), 2);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 4, 5, 6, 7, 
8);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 6, 8);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 5, 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 
8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
     assertCorrectFiltering(columnIndex, gt(col, 0xEE), 7);
@@ -1332,10 +1332,10 @@ public class TestColumnIndexBuilder {
     Set<Integer> set3 = new HashSet<>();
     set3.add(0xAB);
     assertCorrectFiltering(columnIndex, in(col, set3), 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7, 
8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 0xFF), 0, 2, 3, 4, 5, 6, 7, 
8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
     assertCorrectFiltering(columnIndex, gt(col, 0xFF));
@@ -1375,10 +1375,10 @@ public class TestColumnIndexBuilder {
     Set<Long> set1 = new HashSet<>();
     set1.add(0l);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 2, 3, 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     set1.add(null);
     assertCorrectFiltering(columnIndex, in(col, set1), 0, 1, 2, 3, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set1), 4);
+    assertCorrectFiltering(columnIndex, notIn(col, set1), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, 0l), 0, 1, 2, 3, 4, 5);
     assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
     assertCorrectFiltering(columnIndex, gt(col, 2l), 0, 1, 5);
@@ -1412,10 +1412,10 @@ public class TestColumnIndexBuilder {
     Set<Long> set2 = new HashSet<>();
     set2.add(-42l);
     assertCorrectFiltering(columnIndex, in(col, set2), 2, 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 3, 4, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set2.add(null);
     assertCorrectFiltering(columnIndex, in(col, set2), 0, 1, 2, 3, 4, 5, 6, 8);
-    assertCorrectFiltering(columnIndex, notIn(col, set2), 7);
+    assertCorrectFiltering(columnIndex, notIn(col, set2), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, -42l), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
     assertCorrectFiltering(columnIndex, gt(col, 2l), 7);
@@ -1450,10 +1450,10 @@ public class TestColumnIndexBuilder {
     Set<Long> set3 = new HashSet<>();
     set3.add(0l);
     assertCorrectFiltering(columnIndex, in(col, set3), 5);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 6, 7, 
8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     set3.add(null);
     assertCorrectFiltering(columnIndex, in(col, set3), 0, 2, 3, 4, 5, 6, 7);
-    assertCorrectFiltering(columnIndex, notIn(col, set3), 1, 8);
+    assertCorrectFiltering(columnIndex, notIn(col, set3), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, 0l), 0, 1, 2, 3, 4, 5, 6, 
7, 8);
     assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
     assertCorrectFiltering(columnIndex, gt(col, 2l), 1, 3, 5);
diff --git 
a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
 
b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
index 47ea5fc5c..27304a14e 100644
--- 
a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
+++ 
b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
@@ -372,7 +372,7 @@ public class TestColumnIndexFilter {
     assertRows(calculateRowRanges(FilterCompat.get(in(intColumn("column1"), 
set1)), STORE, paths, TOTAL_ROW_COUNT),
       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13);
     assertRows(calculateRowRanges(FilterCompat.get(notIn(intColumn("column1"), 
set1)), STORE, paths, TOTAL_ROW_COUNT),
-      1, 2, 3, 4, 5, 6, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 
27, 28, 29);
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
 
     Set<Binary> set2 = new HashSet<>();
     set2.add(fromString("Zulu"));
@@ -380,24 +380,24 @@ public class TestColumnIndexFilter {
     assertRows(calculateRowRanges(FilterCompat.get(in(binaryColumn("column2"), 
set2)), STORE, paths, TOTAL_ROW_COUNT),
       0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
23, 24, 25, 26, 27, 28, 29);
     
assertRows(calculateRowRanges(FilterCompat.get(notIn(binaryColumn("column2"), 
set2)), STORE, paths, TOTAL_ROW_COUNT),
-      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
21, 22, 23, 24, 25, 26, 27, 28);
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
 
     Set<Double> set3 = new HashSet<>();
     set3.add(2.03);
     assertRows(calculateRowRanges(FilterCompat.get(in(doubleColumn("column3"), 
set3)), STORE, paths, TOTAL_ROW_COUNT),
       0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20, 21, 22);
     
assertRows(calculateRowRanges(FilterCompat.get(notIn(doubleColumn("column3"), 
set3)), STORE, paths, TOTAL_ROW_COUNT),
-      6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 23, 24, 25, 26, 27, 28, 29);
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
     set3.add(9.98);
     assertRows(calculateRowRanges(FilterCompat.get(in(doubleColumn("column3"), 
set3)), STORE, paths, TOTAL_ROW_COUNT),
       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25);
     
assertRows(calculateRowRanges(FilterCompat.get(notIn(doubleColumn("column3"), 
set3)), STORE, paths, TOTAL_ROW_COUNT),
-      6, 7, 8, 9, 23, 24, 25, 26, 27, 28, 29);
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
     set3.add(null);
     assertRows(calculateRowRanges(FilterCompat.get(in(doubleColumn("column3"), 
set3)), STORE, paths, TOTAL_ROW_COUNT),
       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
     
assertRows(calculateRowRanges(FilterCompat.get(notIn(doubleColumn("column3"), 
set3)), STORE, paths, TOTAL_ROW_COUNT),
-      23, 24, 25);
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 
20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
 
     Set<Boolean> set4 = new HashSet<>();
     set4.add(null);

Reply via email to