PARQUET-511: Integer overflow when counting values in column.

This commit fixes an issue when the number of entries in a column page is 
larger than the size of an integer. No exception is thrown directly, but the 
def level is set incorrectly, leading to a null value being returned during 
read.

Author: Michal Gorecki <gorec...@amazon.com>

Closes #321 from goreckm/int-overflow and squashes the following commits:

d224815 [Michal Gorecki] enhancing exception message
7334be2 [Michal Gorecki] PARQUET-511: Integer overflow when counting values in 
column.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/aced0eb3
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/aced0eb3
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/aced0eb3

Branch: refs/heads/parquet-1.8.x
Commit: aced0eb3770b82d9bde95ac499f902bed372fd39
Parents: 06567fa
Author: Michal Gorecki <gorec...@amazon.com>
Authored: Mon Aug 1 14:38:07 2016 -0700
Committer: Ryan Blue <b...@apache.org>
Committed: Mon Jan 9 16:54:54 2017 -0800

----------------------------------------------------------------------
 .../java/org/apache/parquet/column/impl/ColumnReaderImpl.java  | 6 +++---
 .../org/apache/parquet/hadoop/ColumnChunkPageReadStore.java    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/aced0eb3/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
----------------------------------------------------------------------
diff --git 
a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
 
b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
index c53977f..6aafb78 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
@@ -149,7 +149,7 @@ public class ColumnReaderImpl implements ColumnReader {
   private int dictionaryId;
 
   private long endOfPageValueCount;
-  private int readValues = 0;
+  private long readValues = 0;
   private int pageValueCount = 0;
 
   private final PrimitiveConverter converter;
@@ -351,8 +351,8 @@ public class ColumnReaderImpl implements ColumnReader {
       this.dictionary = null;
     }
     this.totalValueCount = pageReader.getTotalValueCount();
-    if (totalValueCount == 0) {
-      throw new ParquetDecodingException("totalValueCount == 0");
+    if (totalValueCount <= 0) {
+      throw new ParquetDecodingException("totalValueCount '" + totalValueCount 
+ "' <= 0");
     }
     consume();
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/aced0eb3/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
----------------------------------------------------------------------
diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
index ce10e64..2e8f84a 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
@@ -64,7 +64,7 @@ class ColumnChunkPageReadStore implements PageReadStore, 
DictionaryPageReadStore
       this.decompressor = decompressor;
       this.compressedPages = new LinkedList<DataPage>(compressedPages);
       this.compressedDictionaryPage = compressedDictionaryPage;
-      int count = 0;
+      long count = 0;
       for (DataPage p : compressedPages) {
         count += p.getValueCount();
       }

Reply via email to