Repository: parquet-mr Updated Branches: refs/heads/master 1f470253c -> 9c40a7bb3
PARQUET-645: Fix null handling in DictionaryFilter. This fixes how null is handled by `DictionaryFilter` for equals predicates. Null is never in the dictionary and is encoded by the definition level, so the `DictionaryFilter` would never find the value in the dictionary and would incorrectly filter row groups whenever the filter was `col == null`. Author: Ryan Blue <[email protected]> Closes #348 from rdblue/PARQUET-645-fix-null-dictionary-filter and squashes the following commits: ae8dd41 [Ryan Blue] PARQUET-645: Fix null handling in DictionaryFilter. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/9c40a7bb Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/9c40a7bb Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/9c40a7bb Branch: refs/heads/master Commit: 9c40a7bb3c9aca51d17490960c988dfb7b5acebb Parents: 1f47025 Author: Ryan Blue <[email protected]> Authored: Thu Jun 30 09:47:48 2016 -0700 Committer: Julien Le Dem <[email protected]> Committed: Thu Jun 30 09:47:48 2016 -0700 ---------------------------------------------------------------------- .../filter2/dictionarylevel/DictionaryFilter.java | 12 ++++++++++++ .../filter2/dictionarylevel/DictionaryFilterTest.java | 6 ++++++ 2 files changed, 18 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9c40a7bb/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java index 9b03f82..dc1d649 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java @@ -123,6 +123,12 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> { filterColumn.getColumnPath(); + if (value == null) { + // the dictionary contains only non-null values so isn't helpful. this + // could check the column stats, but the StatisticsFilter is responsible + return BLOCK_MIGHT_MATCH; + } + try { Set<T> dictSet = expandDictionary(meta); if (dictSet != null && !dictSet.contains(value)) { @@ -150,6 +156,12 @@ public class DictionaryFilter implements FilterPredicate.Visitor<Boolean> { filterColumn.getColumnPath(); + if (value == null) { + // the dictionary contains only non-null values so isn't helpful. this + // could check the column stats, but the StatisticsFilter is responsible + return BLOCK_MIGHT_MATCH; + } + try { Set<T> dictSet = expandDictionary(meta); if (dictSet != null && dictSet.size() == 1 && dictSet.contains(value)) { http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9c40a7bb/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java index 754da68..35b944d 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java @@ -193,6 +193,9 @@ public class DictionaryFilterTest { assertTrue("Should drop block for upper case letters", canDrop(eq(b, Binary.fromString("A")), ccmd, dictionaries)); + + assertFalse("Should not drop block for null", + canDrop(eq(b, null), ccmd, dictionaries)); } @Test @@ -211,6 +214,9 @@ public class DictionaryFilterTest { assertFalse("Should not drop block with a known value", canDrop(notEq(b, Binary.fromString("B")), ccmd, dictionaries)); + + assertFalse("Should not drop block for null", + canDrop(notEq(b, null), ccmd, dictionaries)); } @Test
