This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch parquet-1.15.x
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/parquet-1.15.x by this push:
     new 9d6e19f68 GH-3172: Do not drop blocks with some null values if 
`DictionaryFilter` is applied for `UserDefinedPredicate` which keeps null 
values (#3173)
9d6e19f68 is described below

commit 9d6e19f68952d2b77788dc4310b57054f5f3e4e8
Author: Egidijus Bartkus <[email protected]>
AuthorDate: Thu Mar 13 11:24:25 2025 +0200

    GH-3172: Do not drop blocks with some null values if `DictionaryFilter` is 
applied for `UserDefinedPredicate` which keeps null values (#3173)
---
 .../filter2/dictionarylevel/DictionaryFilter.java      |  4 ++++
 .../filter2/dictionarylevel/DictionaryFilterTest.java  | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
index be4455eeb..c6c9f696f 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
@@ -529,6 +529,10 @@ public class DictionaryFilter implements 
FilterPredicate.Visitor<Boolean> {
       return BLOCK_MIGHT_MATCH;
     }
 
+    if (udp.acceptsNullValue()) {
+      return BLOCK_MIGHT_MATCH;
+    }
+
     try {
       Set<T> dictSet = expandDictionary(meta);
       if (dictSet == null) {
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
index 5b9e638d6..f5f414c86 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
@@ -106,6 +106,7 @@ public class DictionaryFilterTest {
       + "required binary binary_field; "
       + "required binary single_value_field; "
       + "optional binary optional_single_value_field; "
+      + "optional int32 optional_single_value_int32_field;"
       + "required fixed_len_byte_array(17) fixed_field (DECIMAL(40,4)); "
       + "required int32 int32_field; "
       + "required int64 int64_field; "
@@ -194,6 +195,7 @@ public class DictionaryFilterTest {
       // 10% of the time, leave the field null
       if (index % 10 > 0) {
         group.append("optional_single_value_field", "sharp");
+        group.append("optional_single_value_int32_field", 42);
       }
 
       writer.write(group);
@@ -290,6 +292,7 @@ public class DictionaryFilterTest {
         "binary_field",
         "single_value_field",
         "optional_single_value_field",
+        "optional_single_value_int32_field",
         "int32_field",
         "int64_field",
         "double_field",
@@ -327,6 +330,7 @@ public class DictionaryFilterTest {
         "binary_field",
         "single_value_field",
         "optional_single_value_field",
+        "optional_single_value_int32_field",
         "fixed_field",
         "int32_field",
         "int64_field",
@@ -670,6 +674,20 @@ public class DictionaryFilterTest {
         canDrop(userDefined(intColumn("int32_field"), undroppable), ccmd, 
dictionaries));
   }
 
+  @Test
+  public void testNullAcceptingUdp() throws Exception {
+    InInt32UDP drop42DenyNulls = new InInt32UDP(Sets.newHashSet(205));
+    InInt32UDP drop42AcceptNulls = new InInt32UDP(Sets.newHashSet(null, 205));
+
+    // A column with value 42 and 10% nulls
+    IntColumn intColumnWithNulls = 
intColumn("optional_single_value_int32_field");
+
+    assertTrue("Should drop block", canDrop(userDefined(intColumnWithNulls, 
drop42DenyNulls), ccmd, dictionaries));
+    assertFalse(
+        "Should not drop block for null accepting udp",
+        canDrop(userDefined(intColumnWithNulls, drop42AcceptNulls), ccmd, 
dictionaries));
+  }
+
   @Test
   public void testInverseUdp() throws Exception {
     InInt32UDP droppable = new InInt32UDP(ImmutableSet.of(42));

Reply via email to