DRILL-4349: parquet reader returns wrong results when reading a nullable column that starts with a large number of nulls (>30k)
Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/88edebd5 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/88edebd5 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/88edebd5 Branch: refs/heads/1.5.0 Commit: 88edebd5ae76240056c7cada6a79685c0e16cdb9 Parents: 7502584 Author: adeneche <[email protected]> Authored: Wed Feb 3 15:42:22 2016 -0800 Committer: Jason Altekruse <[email protected]> Committed: Tue Feb 9 08:03:50 2016 -0800 ---------------------------------------------------------------------- .../parquet/columnreaders/NullableColumnReader.java | 2 +- .../exec/store/parquet2/TestDrillParquetReader.java | 13 +++++++++++++ .../src/test/resources/parquet2/4349.csv.gz | Bin 0 -> 202 bytes 3 files changed, 14 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/88edebd5/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java index 4e52b70..2929eb2 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java @@ -154,10 +154,10 @@ abstract class NullableColumnReader<V extends ValueVector> extends ColumnReader< writeCount += runLength; valuesReadInCurrentPass += runLength; + pageReader.readPosInBytes = readStartInBytes + readLength; } pageReader.valuesRead += recordsReadInThisIteration; - pageReader.readPosInBytes = readStartInBytes + readLength; totalValuesRead += runLength + nullRunLength; http://git-wip-us.apache.org/repos/asf/drill/blob/88edebd5/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java index 05ca7fc..b18fd9d 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java @@ -71,4 +71,17 @@ public class TestDrillParquetReader extends BaseTestQuery { public void testOptionalDecimal38() throws Exception { testColumn("d38_opt"); } + + @Test + public void test4349() throws Exception { + // start by creating a parquet file from the input csv file + runSQL("CREATE TABLE dfs_test.tmp.`4349` AS SELECT columns[0] id, CAST(NULLIF(columns[1], '') AS DOUBLE) val FROM cp.`parquet2/4349.csv.gz`"); + + // querying the parquet file should return the same results found in the csv file + testBuilder() + .unOrdered() + .sqlQuery("SELECT * FROM dfs_test.tmp.`4349` WHERE id = 'b'") + .sqlBaselineQuery("SELECT columns[0] id, CAST(NULLIF(columns[1], '') AS DOUBLE) val FROM cp.`parquet2/4349.csv.gz` WHERE columns[0] = 'b'") + .go(); + } } http://git-wip-us.apache.org/repos/asf/drill/blob/88edebd5/exec/java-exec/src/test/resources/parquet2/4349.csv.gz ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/parquet2/4349.csv.gz b/exec/java-exec/src/test/resources/parquet2/4349.csv.gz new file mode 100644 index 0000000..0729b0c Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet2/4349.csv.gz differ
