[ 
https://issues.apache.org/jira/browse/DRILL-6579?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16542467#comment-16542467
 ] 

ASF GitHub Bot commented on DRILL-6579:
---------------------------------------

sohami closed pull request #1361: DRILL-6579: Added sanity checks to the 
Parquet reader to avoid infiniā€¦
URL: https://github.com/apache/drill/pull/1361
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
index 25dfbc8ebc2..f5825e4d990 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
@@ -40,9 +40,9 @@ public int readBatch() throws Exception {
     ColumnReader<?> firstColumnStatus = readState.getFirstColumnReader();
     int currBatchNumRecords = 
readState.batchSizerMgr().getCurrentRecordsPerBatch();
     long recordsToRead = Math.min(currBatchNumRecords, 
readState.getRemainingValuesToRead());
-    int readCount = readRecords(firstColumnStatus, recordsToRead);
-
+    int readCount = recordsToRead > 0 ? readRecords(firstColumnStatus, 
recordsToRead) : 0;
     readState.fillNullVectors(readCount);
+
     return readCount;
   }
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
index 7bdc33ef5cf..a015dd7c43a 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
@@ -18,6 +18,7 @@
 package org.apache.drill.exec.store.parquet.columnreaders;
 
 import com.google.common.base.Stopwatch;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -111,7 +112,7 @@ private int readRecordsInBulk(int recordsToReadInThisPass) 
throws IOException {
 
       // Read the column data
       int readColumns = columnReader.readRecordsInBulk(batchNumRecords);
-      assert readColumns <= batchNumRecords : "Reader cannot return more 
values than requested..";
+      Preconditions.checkState(readColumns <= batchNumRecords, "Reader cannot 
return more values than requested..");
 
       if (!overflowCondition) {
         if (prevReadColumns >= 0 && prevReadColumns != readColumns) {
@@ -181,7 +182,7 @@ private void 
handleColumnOverflow(List<VarLenColumnBatchStats> columnStats, int
     // Register batch overflow data with the record batch sizer manager (if 
any)
     if (builder != null) {
       Map<String, FieldOverflowStateContainer> overflowContainerMap = 
parentReader.batchSizerMgr.getFieldOverflowMap();
-      Map<String, FieldOverflowDefinition> overflowDefMap           = 
builder.build().getRecordOverflowDefinition().getFieldOverflowDefs();
+      Map<String, FieldOverflowDefinition> overflowDefMap = 
builder.build().getRecordOverflowDefinition().getFieldOverflowDefs();
 
       for (Map.Entry<String, FieldOverflowDefinition> entry : 
overflowDefMap.entrySet()) {
         FieldOverflowStateContainer overflowStateContainer = new 
FieldOverflowStateContainer(entry.getValue(), null);
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
index 0e50406d2f1..81b72642b2a 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
@@ -106,11 +107,13 @@ final void set(PageDataInfo pageInfoInput, boolean clear) 
{
     pageInfo.dictionaryValueReader = pageInfoInput.dictionaryValueReader;
     pageInfo.numPageValues = pageInfoInput.numPageValues;
     if (clear) {
-    buffer.clear();
-  }
+      buffer.clear();
+    }
   }
 
   final VarLenColumnBulkEntry getEntry(int valuesToRead) {
+    Preconditions.checkArgument(valuesToRead > 0, "Number of values to read 
[%s] should be greater than zero", valuesToRead);
+
     VarLenColumnBulkEntry entry = null;
 
     // If there is overflow data, then we need to consume it first
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
index 8ba7ac44521..7d7626365fd 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.nio.ByteBuffer;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.DictionaryReaderWrapper;
@@ -50,6 +51,8 @@ private final VarLenColumnBulkEntry getEntryBulk(int 
valuesToRead) {
     final DictionaryReaderWrapper valueReader = pageInfo.dictionaryValueReader;
     final int[] valueLengths = entry.getValuesLength();
     final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+    Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be 
greater than zero", readBatch);
+
     final byte[] tgtBuff = entry.getInternalDataArray();
     final int tgtLen = tgtBuff.length;
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
index d95050d7857..cec0c7ff634 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.nio.ByteBuffer;
 import org.apache.drill.common.exceptions.DrillRuntimeException;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
@@ -51,6 +52,8 @@ private final VarLenColumnBulkEntry getEntryBulk(int 
valuesToRead) {
 
     final int[] valueLengths = entry.getValuesLength();
     final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+    Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be 
greater than zero", readBatch);
+
     final byte[] tgtBuff = entry.getInternalDataArray();
     final byte[] srcBuff = buffer.array();
     final int srcLen = buffer.remaining();
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
index e8dc15fa78a..a6e7077241a 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.nio.ByteBuffer;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.PageDataInfo;
@@ -32,19 +33,19 @@
     VarLenColumnBulkInputCallback containerCallback) {
 
     super(buffer, pageInfo, columnPrecInfo, entry, containerCallback);
+    Preconditions.checkArgument(columnPrecInfo.precision >= 0, "Fixed length 
precision [%s] cannot be lower than zero", columnPrecInfo.precision);
   }
 
   /** {@inheritDoc} */
   @Override
   final VarLenColumnBulkEntry getEntry(int valuesToRead) {
-    assert columnPrecInfo.precision >= 0 : "Fixed length precision cannot be 
lower than zero";
-
     load(true); // load new data to process
 
     final int expectedDataLen = columnPrecInfo.precision;
     final int entrySz = 4 + columnPrecInfo.precision;
-    final int maxValues = Math.min(entry.getMaxEntries(), 
(pageInfo.pageDataLen - pageInfo.pageDataOff) / entrySz);
-    final int readBatch = Math.min(maxValues, valuesToRead);
+    final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+    Preconditions.checkState(readBatch > 0, "Read batch count [%d] should be 
greater than zero", readBatch);
+
     final int[] valueLengths = entry.getValuesLength();
     final byte[] tgtBuff = entry.getInternalDataArray();
     final byte[] srcBuff = buffer.array();
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
index f7b6dceca88..e33919f0461 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.nio.ByteBuffer;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.DictionaryReaderWrapper;
@@ -52,6 +53,8 @@ private final VarLenColumnBulkEntry getEntryBulk(int 
valuesToRead) {
     final DictionaryReaderWrapper valueReader = pageInfo.dictionaryValueReader;
     final int[] valueLengths = entry.getValuesLength();
     final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+    Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be 
greater than zero", readBatch);
+
     final byte[] tgtBuff = entry.getInternalDataArray();
     final int tgtLen = tgtBuff.length;
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
index 7ffb27af550..ce39859ad5d 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.nio.ByteBuffer;
 import org.apache.drill.common.exceptions.DrillRuntimeException;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
@@ -53,6 +54,8 @@ VarLenColumnBulkEntry getEntryBulk(int valuesToRead) {
 
     final int[] valueLengths = entry.getValuesLength();
     final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+    Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be 
greater than zero", readBatch);
+
     final byte[] tgtBuff = entry.getInternalDataArray();
     final byte[] srcBuff = buffer.array();
     final int srcLen = buffer.remaining();
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
index 98089fd9373..3869113249b 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.nio.ByteBuffer;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.PageDataInfo;
@@ -33,19 +34,20 @@
     VarLenColumnBulkInputCallback containerCallback) {
 
     super(buffer, pageInfo, columnPrecInfo, entry, containerCallback);
+    Preconditions.checkArgument(columnPrecInfo.precision >= 0, "Fixed length 
precision cannot be lower than zero");
   }
 
   /** {@inheritDoc} */
   @Override
   final VarLenColumnBulkEntry getEntry(int valuesToRead) {
-    assert columnPrecInfo.precision >= 0 : "Fixed length precision cannot be 
lower than zero";
-
     // TODO - We should not use force reload for sparse columns (values with 
lot of nulls)
     load(true); // load new data to process
 
     final int expectedDataLen = columnPrecInfo.precision;
     final int entrySz = 4 + columnPrecInfo.precision;
     final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+    Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be 
greater than zero", readBatch);
+
     final int[] valueLengths = entry.getValuesLength();
     final byte[] tgtBuff = entry.getInternalDataArray();
     final byte[] srcBuff = buffer.array();
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
index cacd5c8495f..6c8891ff959 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet.columnreaders;
 
+import com.google.common.base.Preconditions;
 import java.nio.ByteBuffer;
 
 import 
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.VarLenColumnBulkInputCallback;
@@ -80,6 +81,8 @@ VarLenColumnBulkEntry getEntry(int valuesToRead) {
     // load some overflow data for processing
     final int maxValues = Math.min(entry.getMaxEntries(), valuesToRead);
     final int numAvailableValues = 
overflowDataCache.load(overflowState.currValueIdx, maxValues);
+    Preconditions.checkState(numAvailableValues > 0, "Number values to read 
[%s] should be greater than zero", numAvailableValues);
+
     final int firstValueDataOffset = getDataBufferStartOffset() + 
adjustDataOffset(overflowState.currValueIdx);
     int totalDataLen = 0;
     int currValueIdx = overflowState.currValueIdx;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Add sanity checks to Parquet Reader 
> ------------------------------------
>
>                 Key: DRILL-6579
>                 URL: https://issues.apache.org/jira/browse/DRILL-6579
>             Project: Apache Drill
>          Issue Type: Improvement
>            Reporter: salim achouche
>            Assignee: salim achouche
>            Priority: Major
>              Labels: ready-to-commit
>             Fix For: 1.14.0
>
>
> Add sanity checks to the Parquet reader to avoid infinite loops.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to