This is an automated email from the ASF dual-hosted git repository. gabor pushed a commit to branch parquet-1.11.x in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
commit 49de5b475c21c241dff9b5020c17401cc6707b87 Author: Gabor Szadovszky <[email protected]> AuthorDate: Fri Jan 10 07:08:01 2020 +0100 PARQUET-1744: Some filters throws ArrayIndexOutOfBoundsException (#732) (cherry picked from commit f0fc29fd3341046f7d46a4a02a7c9ec3d7cd3e46) --- .../internal/column/columnindex/BoundaryOrder.java | 32 +++++++ .../column/columnindex/TestBoundaryOrder.java | 56 +++++++++++ .../filter2/columnindex/TestColumnIndexFilter.java | 106 ++++++++++++++------- 3 files changed, 159 insertions(+), 35 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java index e47b5b3..8a7cee0 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java @@ -84,6 +84,10 @@ public enum BoundaryOrder { @Override OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = 0; int right = length; do { @@ -100,6 +104,10 @@ public enum BoundaryOrder { @Override OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = 0; int right = length; do { @@ -116,6 +124,10 @@ public enum BoundaryOrder { @Override OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = -1; int right = length - 1; do { @@ -132,6 +144,10 @@ public enum BoundaryOrder { @Override OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = -1; int right = length - 1; do { @@ -207,6 +223,10 @@ public enum BoundaryOrder { @Override OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = -1; int right = length - 1; do { @@ -223,6 +243,10 @@ public enum BoundaryOrder { @Override OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = -1; int right = length - 1; do { @@ -239,6 +263,10 @@ public enum BoundaryOrder { @Override OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = 0; int right = length; do { @@ -255,6 +283,10 @@ public enum BoundaryOrder { @Override OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) { int length = comparator.arrayLength(); + if (length == 0) { + // No matching rows if the column index contains null pages only + return IndexIterator.EMPTY; + } int left = 0; int right = length; do { diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java index 3d2a924..97cbf07 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java +++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java @@ -183,6 +183,7 @@ public class TestBoundaryOrder { private static final int RAND_COUNT = 2000; private static final ColumnIndexBase<?> RAND_ASCENDING; private static final ColumnIndexBase<?> RAND_DESCENDING; + private static final ColumnIndexBase<?> ALL_NULL_PAGES; private static final SpyValueComparatorBuilder SPY_COMPARATOR_BUILDER = new SpyValueComparatorBuilder(); static { ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE); @@ -233,6 +234,13 @@ public class TestBoundaryOrder { builder.add(stats(it.next(), it.next())); } RAND_DESCENDING = (ColumnIndexBase<?>) builder.build(); + + builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE); + // Add a couple of empty stats so column index will contain null pages only + for (int i = 0; i < 10; ++i) { + builder.add(Statistics.createStats(TYPE)); + } + ALL_NULL_PAGES = (ColumnIndexBase<?>) builder.build(); } private static Statistics<?> stats(int min, int max) { @@ -267,6 +275,14 @@ public class TestBoundaryOrder { BoundaryOrder.UNORDERED::eq, BoundaryOrder.DESCENDING::eq, DESCENDING.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ", + BoundaryOrder.UNORDERED::eq, + BoundaryOrder.ASCENDING::eq, + ALL_NULL_PAGES.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ", + BoundaryOrder.UNORDERED::eq, + BoundaryOrder.DESCENDING::eq, + ALL_NULL_PAGES.createValueComparator(i)); } for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) { ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i); @@ -305,6 +321,14 @@ public class TestBoundaryOrder { BoundaryOrder.UNORDERED::gt, BoundaryOrder.DESCENDING::gt, DESCENDING.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ", + BoundaryOrder.UNORDERED::gt, + BoundaryOrder.ASCENDING::gt, + ALL_NULL_PAGES.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ", + BoundaryOrder.UNORDERED::gt, + BoundaryOrder.DESCENDING::gt, + ALL_NULL_PAGES.createValueComparator(i)); } for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) { ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i); @@ -343,6 +367,14 @@ public class TestBoundaryOrder { BoundaryOrder.UNORDERED::gtEq, BoundaryOrder.DESCENDING::gtEq, DESCENDING.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ", + BoundaryOrder.UNORDERED::gtEq, + BoundaryOrder.ASCENDING::gtEq, + ALL_NULL_PAGES.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ", + BoundaryOrder.UNORDERED::gtEq, + BoundaryOrder.DESCENDING::gtEq, + ALL_NULL_PAGES.createValueComparator(i)); } for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) { ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i); @@ -381,6 +413,14 @@ public class TestBoundaryOrder { BoundaryOrder.UNORDERED::lt, BoundaryOrder.DESCENDING::lt, DESCENDING.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ", + BoundaryOrder.UNORDERED::lt, + BoundaryOrder.ASCENDING::lt, + ALL_NULL_PAGES.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ", + BoundaryOrder.UNORDERED::lt, + BoundaryOrder.DESCENDING::lt, + ALL_NULL_PAGES.createValueComparator(i)); } for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) { ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i); @@ -419,6 +459,14 @@ public class TestBoundaryOrder { BoundaryOrder.UNORDERED::ltEq, BoundaryOrder.DESCENDING::ltEq, DESCENDING.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ", + BoundaryOrder.UNORDERED::ltEq, + BoundaryOrder.ASCENDING::ltEq, + ALL_NULL_PAGES.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ", + BoundaryOrder.UNORDERED::ltEq, + BoundaryOrder.DESCENDING::ltEq, + ALL_NULL_PAGES.createValueComparator(i)); } for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) { ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i); @@ -457,6 +505,14 @@ public class TestBoundaryOrder { BoundaryOrder.UNORDERED::notEq, BoundaryOrder.DESCENDING::notEq, DESCENDING.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ", + BoundaryOrder.UNORDERED::notEq, + BoundaryOrder.ASCENDING::notEq, + ALL_NULL_PAGES.createValueComparator(i)); + validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ", + BoundaryOrder.UNORDERED::notEq, + BoundaryOrder.DESCENDING::notEq, + ALL_NULL_PAGES.createValueComparator(i)); } for (int i = FROM - 1; i <= TO + 1; ++i) { ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i); diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java index ae27214..f37a343 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java +++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java @@ -27,6 +27,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.eq; import static org.apache.parquet.filter2.predicate.FilterApi.gt; import static org.apache.parquet.filter2.predicate.FilterApi.gtEq; import static org.apache.parquet.filter2.predicate.FilterApi.intColumn; +import static org.apache.parquet.filter2.predicate.FilterApi.longColumn; import static org.apache.parquet.filter2.predicate.FilterApi.lt; import static org.apache.parquet.filter2.predicate.FilterApi.ltEq; import static org.apache.parquet.filter2.predicate.FilterApi.notEq; @@ -38,10 +39,11 @@ import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.DESCE import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.UNORDERED; import static org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges; import static org.apache.parquet.io.api.Binary.fromString; -import static org.apache.parquet.schema.OriginalType.UTF8; +import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; import static org.apache.parquet.schema.Types.optional; import static org.junit.Assert.assertArrayEquals; @@ -161,55 +163,56 @@ public class TestColumnIndexFilter { /** * <pre> - * row column1 column2 column3 column4 (no column index) - * ------0------ ------0------ ------0------ ------0------ - * 0. 1 Zulu 2.03 - * ------1------ ------1------ ------1------ ------1------ - * 1. 2 Yankee 4.67 - * 2. 3 Xray 3.42 - * 3. 4 Whiskey 8.71 + * row column1 column2 column3 column4 column5 + * (no column index) + * ------0------ ------0------ ------0------ ------0------ ------0------ + * 0. 1 Zulu 2.03 null + * ------1------ ------1------ ------1------ ------1------ ------1------ + * 1. 2 Yankee 4.67 null + * 2. 3 Xray 3.42 null + * 3. 4 Whiskey 8.71 null * ------2------ ------2------ - * 4. 5 Victor 0.56 - * 5. 6 Uniform 4.30 + * 4. 5 Victor 0.56 null + * 5. 6 Uniform 4.30 null * ------2------ ------3------ - * 6. null null null + * 6. null null null null * ------2------ ------4------ - * 7. 7 Tango 3.50 + * 7. 7 Tango 3.50 null * ------3------ - * 8. 7 null 3.14 + * 8. 7 null 3.14 null * ------3------ - * 9. 7 null null + * 9. 7 null null null * ------3------ - * 10. null null 9.99 + * 10. null null 9.99 null * ------4------ - * 11. 8 Sierra 8.78 + * 11. 8 Sierra 8.78 null * ------5------ - * 12. 9 Romeo 9.56 - * 13. 10 Quebec 2.71 + * 12. 9 Romeo 9.56 null + * 13. 10 Quebec 2.71 null * ------4------ - * 14. 11 Papa 5.71 - * 15. 12 Oscar 4.09 + * 14. 11 Papa 5.71 null + * 15. 12 Oscar 4.09 null * ------5------ ------4------ ------6------ - * 16. 13 November null - * 17. 14 Mike null - * 18. 15 Lima 0.36 - * 19. 16 Kilo 2.94 - * 20. 17 Juliett 4.23 + * 16. 13 November null null + * 17. 14 Mike null null + * 18. 15 Lima 0.36 null + * 19. 16 Kilo 2.94 null + * 20. 17 Juliett 4.23 null * ------5------ ------6------ ------7------ - * 21. 18 India null - * 22. 19 Hotel 5.32 + * 21. 18 India null null + * 22. 19 Hotel 5.32 null * ------5------ - * 23. 20 Golf 4.17 - * 24. 21 Foxtrot 7.92 - * 25. 22 Echo 7.95 + * 23. 20 Golf 4.17 null + * 24. 21 Foxtrot 7.92 null + * 25. 22 Echo 7.95 null * ------6------ - * 26. 23 Delta null + * 26. 23 Delta null null * ------6------ - * 27. 24 Charlie null + * 27. 24 Charlie null null * ------8------ - * 28. 25 Bravo null + * 28. 25 Bravo null null * ------7------ - * 29. 26 Alfa null + * 29. 26 Alfa null null * </pre> */ private static final long TOTAL_ROW_COUNT = 30; @@ -231,7 +234,7 @@ public class TestColumnIndexFilter { .addPage(6) .addPage(3) .build(); - private static final ColumnIndex COLUMN2_CI = new CIBuilder(optional(BINARY).as(UTF8).named("column2"), DESCENDING) + private static final ColumnIndex COLUMN2_CI = new CIBuilder(optional(BINARY).as(stringType()).named("column2"), DESCENDING) .addPage(0, "Zulu", "Zulu") .addPage(0, "Whiskey", "Yankee") .addPage(1, "Tango", "Victor") @@ -281,6 +284,14 @@ public class TestColumnIndexFilter { .addPage(7) .addPage(2) .build(); + private static final ColumnIndex COLUMN5_CI = new CIBuilder(optional(INT64).named("column5"), ASCENDING) + .addNullPage(1) + .addNullPage(29) + .build(); + private static final OffsetIndex COLUMN5_OI = new OIBuilder() + .addPage(1) + .addPage(29) + .build(); private static final ColumnIndexStore STORE = new ColumnIndexStore() { @Override public ColumnIndex getColumnIndex(ColumnPath column) { @@ -293,6 +304,8 @@ public class TestColumnIndexFilter { return COLUMN3_CI; case "column4": return COLUMN4_CI; + case "column5": + return COLUMN5_CI; default: return null; } @@ -309,6 +322,8 @@ public class TestColumnIndexFilter { return COLUMN3_OI; case "column4": return COLUMN4_OI; + case "column5": + return COLUMN5_OI; default: throw new MissingOffsetIndexException(column); } @@ -461,4 +476,25 @@ public class TestColumnIndexFilter { TOTAL_ROW_COUNT); } + @Test + public void testFilteringWithAllNullPages() { + Set<ColumnPath> paths = paths("column1", "column5"); + + assertAllRows(calculateRowRanges(FilterCompat.get( + notEq(longColumn("column5"), 1234567L)), + STORE, paths, TOTAL_ROW_COUNT), + TOTAL_ROW_COUNT); + assertAllRows(calculateRowRanges(FilterCompat.get( + or(gtEq(intColumn("column1"), 10), + notEq(longColumn("column5"), 1234567L))), + STORE, paths, TOTAL_ROW_COUNT), + TOTAL_ROW_COUNT); + assertRows(calculateRowRanges(FilterCompat.get( + eq(longColumn("column5"), 1234567L)), + STORE, paths, TOTAL_ROW_COUNT)); + assertRows(calculateRowRanges(FilterCompat.get( + and(lt(intColumn("column1"), 20), + gtEq(longColumn("column5"), 1234567L))), + STORE, paths, TOTAL_ROW_COUNT)); + } }
