Fix for Parquet reader in larger files brought about by more aggressive memory management.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/749dbe40 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/749dbe40 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/749dbe40 Branch: refs/heads/master Commit: 749dbe40ae0fcec3a4387c3b7454ba3ad0fe7902 Parents: ffbf3ae Author: Jason Altekruse <altekruseja...@gmail.com> Authored: Fri Aug 29 19:45:22 2014 -0700 Committer: Jacques Nadeau <jacq...@apache.org> Committed: Fri Aug 29 23:05:56 2014 -0700 ---------------------------------------------------------------------- .../NullableVarLengthValuesColumn.java | 6 ++---- .../physical/impl/writer/TestParquetWriter.java | 16 ++++++++++++++++ .../exec/store/parquet/ParquetRecordReaderTest.java | 5 +++++ 3 files changed, 23 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/749dbe40/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java index dc29fbd..8bff0b0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableVarLengthValuesColumn.java @@ -88,12 +88,10 @@ public abstract class NullableVarLengthValuesColumn<V extends ValueVector> exten dataTypeLengthInBits = pageReader.pageDataByteArray.getInt((int) pageReader.readyToReadPosInBytes); } // I think this also needs to happen if it is null for the random access - if (! variableWidthVector.getMutator().setValueLengthSafe((int) valuesReadInCurrentPass + pageReader.valuesReadyToRead, dataTypeLengthInBits)) { - return true; - } boolean success = setSafe(valuesReadInCurrentPass + pageReader.valuesReadyToRead, pageReader.pageDataByteArray, (int) pageReader.readyToReadPosInBytes + 4, dataTypeLengthInBits); - assert success; + if ( ! success ) + return true; return false; } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/749dbe40/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java index a61b40f..268d03d 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java @@ -236,6 +236,22 @@ public class TestParquetWriter extends BaseTestQuery { } } + @Ignore + @Test + public void testParquetRead2() throws Exception { + test("alter system set `store.parquet.use_new_reader` = true"); + List<QueryResultBatch> expected = testSqlWithResults("select s_comment,s_suppkey from dfs.`/tmp/sf100_supplier.parquet`"); + test("alter system set `store.parquet.use_new_reader` = false"); + List<QueryResultBatch> results = testSqlWithResults("select s_comment,s_suppkey from dfs.`/tmp/sf100_supplier.parquet`"); + compareResults(expected, results); + for (QueryResultBatch result : results) { + result.release(); + } + for (QueryResultBatch result : expected) { + result.release(); + } + } + public void runTestAndValidate(String selection, String validationSelection, String inputTable, String outputFile) throws Exception { Path path = new Path("/tmp/" + outputFile); http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/749dbe40/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java index def6dac..ecdb990 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java @@ -152,6 +152,11 @@ public class ParquetRecordReaderTest extends BaseTestQuery{ } @Test + public void testNullableVarCharMemory() throws Exception { + testFull(QueryType.SQL, "select s_comment,s_suppkey from dfs.`/tmp/sf100_supplier.parquet`", "", 1, 1, 1000, false); + } + + @Test public void testReadVoter() throws Exception { testFull(QueryType.SQL, "select * from dfs.`/tmp/voter.parquet`", "", 1, 1, 1000, false); }