This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new b6d54cccc GH-3172: Do not drop blocks with some null values if
`DictionaryFilter` is applied for `UserDefinedPredicate` which keeps null
values (#3173)
b6d54cccc is described below
commit b6d54cccc35f54813fa1ea97887d1b10b3757091
Author: Egidijus Bartkus <[email protected]>
AuthorDate: Thu Mar 13 11:24:25 2025 +0200
GH-3172: Do not drop blocks with some null values if `DictionaryFilter` is
applied for `UserDefinedPredicate` which keeps null values (#3173)
---
.../filter2/dictionarylevel/DictionaryFilter.java | 4 ++++
.../filter2/dictionarylevel/DictionaryFilterTest.java | 18 ++++++++++++++++++
2 files changed, 22 insertions(+)
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
index be4455eeb..c6c9f696f 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java
@@ -529,6 +529,10 @@ public class DictionaryFilter implements
FilterPredicate.Visitor<Boolean> {
return BLOCK_MIGHT_MATCH;
}
+ if (udp.acceptsNullValue()) {
+ return BLOCK_MIGHT_MATCH;
+ }
+
try {
Set<T> dictSet = expandDictionary(meta);
if (dictSet == null) {
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
index 5b9e638d6..f5f414c86 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java
@@ -106,6 +106,7 @@ public class DictionaryFilterTest {
+ "required binary binary_field; "
+ "required binary single_value_field; "
+ "optional binary optional_single_value_field; "
+ + "optional int32 optional_single_value_int32_field;"
+ "required fixed_len_byte_array(17) fixed_field (DECIMAL(40,4)); "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
@@ -194,6 +195,7 @@ public class DictionaryFilterTest {
// 10% of the time, leave the field null
if (index % 10 > 0) {
group.append("optional_single_value_field", "sharp");
+ group.append("optional_single_value_int32_field", 42);
}
writer.write(group);
@@ -290,6 +292,7 @@ public class DictionaryFilterTest {
"binary_field",
"single_value_field",
"optional_single_value_field",
+ "optional_single_value_int32_field",
"int32_field",
"int64_field",
"double_field",
@@ -327,6 +330,7 @@ public class DictionaryFilterTest {
"binary_field",
"single_value_field",
"optional_single_value_field",
+ "optional_single_value_int32_field",
"fixed_field",
"int32_field",
"int64_field",
@@ -670,6 +674,20 @@ public class DictionaryFilterTest {
canDrop(userDefined(intColumn("int32_field"), undroppable), ccmd,
dictionaries));
}
+ @Test
+ public void testNullAcceptingUdp() throws Exception {
+ InInt32UDP drop42DenyNulls = new InInt32UDP(Sets.newHashSet(205));
+ InInt32UDP drop42AcceptNulls = new InInt32UDP(Sets.newHashSet(null, 205));
+
+ // A column with value 42 and 10% nulls
+ IntColumn intColumnWithNulls =
intColumn("optional_single_value_int32_field");
+
+ assertTrue("Should drop block", canDrop(userDefined(intColumnWithNulls,
drop42DenyNulls), ccmd, dictionaries));
+ assertFalse(
+ "Should not drop block for null accepting udp",
+ canDrop(userDefined(intColumnWithNulls, drop42AcceptNulls), ccmd,
dictionaries));
+ }
+
@Test
public void testInverseUdp() throws Exception {
InInt32UDP droppable = new InInt32UDP(ImmutableSet.of(42));