Repository: parquet-mr Updated Branches: refs/heads/master 60b6d5aa3 -> 5c85b8dda
PARQUET-511: Integer overflow when counting values in column. This commit fixes an issue when the number of entries in a column page is larger than the size of an integer. No exception is thrown directly, but the def level is set incorrectly, leading to a null value being returned during read. Author: Michal Gorecki <[email protected]> Closes #321 from goreckm/int-overflow and squashes the following commits: d224815 [Michal Gorecki] enhancing exception message 7334be2 [Michal Gorecki] PARQUET-511: Integer overflow when counting values in column. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/5c85b8dd Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/5c85b8dd Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/5c85b8dd Branch: refs/heads/master Commit: 5c85b8dda5f3047732a17b818256b9289274d071 Parents: 60b6d5a Author: Michal Gorecki <[email protected]> Authored: Mon Aug 1 14:38:07 2016 -0700 Committer: Julien Le Dem <[email protected]> Committed: Mon Aug 1 14:38:07 2016 -0700 ---------------------------------------------------------------------- .../java/org/apache/parquet/column/impl/ColumnReaderImpl.java | 6 +++--- .../org/apache/parquet/hadoop/ColumnChunkPageReadStore.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/5c85b8dd/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java index 8c2a4bf..3fc327e 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java @@ -150,7 +150,7 @@ public class ColumnReaderImpl implements ColumnReader { private int dictionaryId; private long endOfPageValueCount; - private int readValues = 0; + private long readValues = 0; private int pageValueCount = 0; private final PrimitiveConverter converter; @@ -352,8 +352,8 @@ public class ColumnReaderImpl implements ColumnReader { this.dictionary = null; } this.totalValueCount = pageReader.getTotalValueCount(); - if (totalValueCount == 0) { - throw new ParquetDecodingException("totalValueCount == 0"); + if (totalValueCount <= 0) { + throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0"); } consume(); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/5c85b8dd/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java index b0d0d30..f428e85 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java @@ -64,7 +64,7 @@ class ColumnChunkPageReadStore implements PageReadStore, DictionaryPageReadStore this.decompressor = decompressor; this.compressedPages = new LinkedList<DataPage>(compressedPages); this.compressedDictionaryPage = compressedDictionaryPage; - int count = 0; + long count = 0; for (DataPage p : compressedPages) { count += p.getValueCount(); }
