Drill-418: Fixes for Parquet nullable reader bug.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/7ad6de69 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/7ad6de69 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/7ad6de69 Branch: refs/heads/master Commit: 7ad6de699303f5ba60f0d4e4034b8bbd7edd15c4 Parents: e753ade Author: Jason Altekruse <[email protected]> Authored: Mon Mar 17 09:12:19 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Mon Mar 17 09:12:19 2014 -0700 ---------------------------------------------------------------------- .../drill/exec/store/parquet/ColumnReader.java | 2 +- .../exec/store/parquet/NullableColumnReader.java | 4 ++-- .../store/parquet/ParquetRecordReaderTest.java | 17 ++++++++++++----- .../exec/store/parquet/TestFileGenerator.java | 12 ++++++++++++ pom.xml | 4 ++-- 5 files changed, 29 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java index 2cc126c..d5c88ef 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java @@ -46,7 +46,7 @@ abstract class ColumnReader { final boolean isFixedLength; // counter for the total number of values read from one or more pages - // when a batch is filled all of these values should be the same for each column + // when a batch is filled all of these values should be the same for all of the columns int totalValuesRead; // counter for the values that have been read in this pass (a single call to the next() method) http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java index 4c33aeb..9be8266 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java @@ -63,7 +63,7 @@ abstract class NullableColumnReader extends ColumnReader{ long runStart = pageReadStatus.readPosInBytes; int runLength = 0; int currentDefinitionLevel = 0; - int currentValueIndexInVector = totalValuesRead; + int currentValueIndexInVector = (int) recordsReadInThisIteration; boolean lastValueWasNull = true; int definitionLevelsRead; // loop to find the longest run of defined values available, can be preceded by several nulls @@ -77,7 +77,7 @@ abstract class NullableColumnReader extends ColumnReader{ } while(currentValueIndexInVector - totalValuesRead < recordsToReadInThisPass && currentValueIndexInVector < valueVecHolder.getValueVector().getValueCapacity() - && definitionLevelsRead < pageReadStatus.currentPage.getValueCount()){ + && pageReadStatus.valuesRead + definitionLevelsRead < pageReadStatus.currentPage.getValueCount()){ currentDefinitionLevel = pageReadStatus.definitionLevels.readInteger(); definitionLevelsRead++; if ( currentDefinitionLevel < columnDescriptor.getMaxDefinitionLevel()){ http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java index 2b043aa..f6a8aa4 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java @@ -78,7 +78,6 @@ public class ParquetRecordReaderTest { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetRecordReaderTest.class); static boolean VERBOSE_DEBUG = false; - private boolean checkValues = true; static final int numberRowGroups = 1; static final int recordsPerRowGroup = 300; @@ -163,8 +162,6 @@ public class ParquetRecordReaderTest { RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet(); - checkValues = false; - DrillConfig config = DrillConfig.create(); try(Drillbit bit1 = new Drillbit(config, serviceSet); DrillClient client = new DrillClient(config, serviceSet.getCoordinator());){ @@ -203,8 +200,6 @@ public class ParquetRecordReaderTest { DrillConfig config = DrillConfig.create(); - checkValues = false; - try(DrillClient client = new DrillClient(config);){ client.connect(); RecordBatchLoader batchLoader = new RecordBatchLoader(client.getAllocator()); @@ -327,6 +322,18 @@ public class ParquetRecordReaderTest { "/tmp/test.parquet", i, props); } + + @Ignore + @Test + public void testReadBug_Drill_418() throws Exception { + HashMap<String, FieldInfo> fields = new HashMap<>(); + ParquetTestProperties props = new ParquetTestProperties(5, 300000, DEFAULT_BYTES_PER_PAGE, fields); + TestFileGenerator.populateDrill_418_fields(props); + String readEntries = "\"/tmp/customer.plain.parquet\""; + testParquetFullEngineEventBased(false, false, "/parquet/parquet_scan_screen_read_entry_replace.json", readEntries, + "unused, no file is generated", 1, props, true); + } + // requires binary file generated by pig from TPCH data, also have to disable assert where data is coming in @Ignore @Test http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java index 689b168..d8892dc 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java @@ -57,6 +57,18 @@ public class TestFileGenerator { static final Object[] binVals = { varLen1, varLen2, varLen3 }; static final Object[] bin2Vals = { varLen3, varLen2, varLen1 }; + static void populateDrill_418_fields(ParquetTestProperties props){ + + props.fields.put("cust_key", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + props.fields.put("nation_key", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + props.fields.put("acctbal", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + props.fields.put("name", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + props.fields.put("address", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + props.fields.put("phone", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + props.fields.put("mktsegment", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + props.fields.put("comment_col", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); + } + static void populateFieldInfoMap(ParquetTestProperties props){ props.fields.put("integer", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props)); props.fields.put("bigInt", new FieldInfo("int64", "bigInt", 64, longVals, TypeProtos.MinorType.BIGINT, props)); http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 37fcbf5..e0b96fa 100644 --- a/pom.xml +++ b/pom.xml @@ -115,9 +115,9 @@ <exclude>**/.buildpath</exclude> <exclude>**/*.proto</exclude> <exclude>**/*.fmpp</exclude> - <exclude>**/*.tdd</exclude> - <exclude>**/*.iml</exclude> <exclude>**/target/**</exclude> + <exclude>**/*.iml</exclude> + <exclude>**/*.tdd</exclude> </excludes> </configuration> </plugin>
