sohami closed pull request #1361: DRILL-6579: Added sanity checks to the
Parquet reader to avoid infini…
URL: https://github.com/apache/drill/pull/1361
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
index 25dfbc8ebc2..f5825e4d990 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/BatchReader.java
@@ -40,9 +40,9 @@ public int readBatch() throws Exception {
ColumnReader<?> firstColumnStatus = readState.getFirstColumnReader();
int currBatchNumRecords =
readState.batchSizerMgr().getCurrentRecordsPerBatch();
long recordsToRead = Math.min(currBatchNumRecords,
readState.getRemainingValuesToRead());
- int readCount = readRecords(firstColumnStatus, recordsToRead);
-
+ int readCount = recordsToRead > 0 ? readRecords(firstColumnStatus,
recordsToRead) : 0;
readState.fillNullVectors(readCount);
+
return readCount;
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
index 7bdc33ef5cf..a015dd7c43a 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBinaryReader.java
@@ -18,6 +18,7 @@
package org.apache.drill.exec.store.parquet.columnreaders;
import com.google.common.base.Stopwatch;
+import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
@@ -111,7 +112,7 @@ private int readRecordsInBulk(int recordsToReadInThisPass)
throws IOException {
// Read the column data
int readColumns = columnReader.readRecordsInBulk(batchNumRecords);
- assert readColumns <= batchNumRecords : "Reader cannot return more
values than requested..";
+ Preconditions.checkState(readColumns <= batchNumRecords, "Reader cannot
return more values than requested..");
if (!overflowCondition) {
if (prevReadColumns >= 0 && prevReadColumns != readColumns) {
@@ -181,7 +182,7 @@ private void
handleColumnOverflow(List<VarLenColumnBatchStats> columnStats, int
// Register batch overflow data with the record batch sizer manager (if
any)
if (builder != null) {
Map<String, FieldOverflowStateContainer> overflowContainerMap =
parentReader.batchSizerMgr.getFieldOverflowMap();
- Map<String, FieldOverflowDefinition> overflowDefMap =
builder.build().getRecordOverflowDefinition().getFieldOverflowDefs();
+ Map<String, FieldOverflowDefinition> overflowDefMap =
builder.build().getRecordOverflowDefinition().getFieldOverflowDefs();
for (Map.Entry<String, FieldOverflowDefinition> entry :
overflowDefMap.entrySet()) {
FieldOverflowStateContainer overflowStateContainer = new
FieldOverflowStateContainer(entry.getValue(), null);
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
index 0e50406d2f1..81b72642b2a 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenBulkPageReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
@@ -106,11 +107,13 @@ final void set(PageDataInfo pageInfoInput, boolean clear)
{
pageInfo.dictionaryValueReader = pageInfoInput.dictionaryValueReader;
pageInfo.numPageValues = pageInfoInput.numPageValues;
if (clear) {
- buffer.clear();
- }
+ buffer.clear();
+ }
}
final VarLenColumnBulkEntry getEntry(int valuesToRead) {
+ Preconditions.checkArgument(valuesToRead > 0, "Number of values to read
[%s] should be greater than zero", valuesToRead);
+
VarLenColumnBulkEntry entry = null;
// If there is overflow data, then we need to consume it first
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
index 8ba7ac44521..7d7626365fd 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryDictionaryReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.DictionaryReaderWrapper;
@@ -50,6 +51,8 @@ private final VarLenColumnBulkEntry getEntryBulk(int
valuesToRead) {
final DictionaryReaderWrapper valueReader = pageInfo.dictionaryValueReader;
final int[] valueLengths = entry.getValuesLength();
final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+ Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be
greater than zero", readBatch);
+
final byte[] tgtBuff = entry.getInternalDataArray();
final int tgtLen = tgtBuff.length;
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
index d95050d7857..cec0c7ff634 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenEntryReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
@@ -51,6 +52,8 @@ private final VarLenColumnBulkEntry getEntryBulk(int
valuesToRead) {
final int[] valueLengths = entry.getValuesLength();
final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+ Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be
greater than zero", readBatch);
+
final byte[] tgtBuff = entry.getInternalDataArray();
final byte[] srcBuff = buffer.array();
final int srcLen = buffer.remaining();
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
index e8dc15fa78a..a6e7077241a 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenFixedEntryReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.PageDataInfo;
@@ -32,19 +33,19 @@
VarLenColumnBulkInputCallback containerCallback) {
super(buffer, pageInfo, columnPrecInfo, entry, containerCallback);
+ Preconditions.checkArgument(columnPrecInfo.precision >= 0, "Fixed length
precision [%s] cannot be lower than zero", columnPrecInfo.precision);
}
/** {@inheritDoc} */
@Override
final VarLenColumnBulkEntry getEntry(int valuesToRead) {
- assert columnPrecInfo.precision >= 0 : "Fixed length precision cannot be
lower than zero";
-
load(true); // load new data to process
final int expectedDataLen = columnPrecInfo.precision;
final int entrySz = 4 + columnPrecInfo.precision;
- final int maxValues = Math.min(entry.getMaxEntries(),
(pageInfo.pageDataLen - pageInfo.pageDataOff) / entrySz);
- final int readBatch = Math.min(maxValues, valuesToRead);
+ final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+ Preconditions.checkState(readBatch > 0, "Read batch count [%d] should be
greater than zero", readBatch);
+
final int[] valueLengths = entry.getValuesLength();
final byte[] tgtBuff = entry.getInternalDataArray();
final byte[] srcBuff = buffer.array();
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
index f7b6dceca88..e33919f0461 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableDictionaryReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.DictionaryReaderWrapper;
@@ -52,6 +53,8 @@ private final VarLenColumnBulkEntry getEntryBulk(int
valuesToRead) {
final DictionaryReaderWrapper valueReader = pageInfo.dictionaryValueReader;
final int[] valueLengths = entry.getValuesLength();
final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+ Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be
greater than zero", readBatch);
+
final byte[] tgtBuff = entry.getInternalDataArray();
final int tgtLen = tgtBuff.length;
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
index 7ffb27af550..ce39859ad5d 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableEntryReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
@@ -53,6 +54,8 @@ VarLenColumnBulkEntry getEntryBulk(int valuesToRead) {
final int[] valueLengths = entry.getValuesLength();
final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+ Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be
greater than zero", readBatch);
+
final byte[] tgtBuff = entry.getInternalDataArray();
final byte[] srcBuff = buffer.array();
final int srcLen = buffer.remaining();
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
index 98089fd9373..3869113249b 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenNullableFixedEntryReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.PageDataInfo;
@@ -33,19 +34,20 @@
VarLenColumnBulkInputCallback containerCallback) {
super(buffer, pageInfo, columnPrecInfo, entry, containerCallback);
+ Preconditions.checkArgument(columnPrecInfo.precision >= 0, "Fixed length
precision cannot be lower than zero");
}
/** {@inheritDoc} */
@Override
final VarLenColumnBulkEntry getEntry(int valuesToRead) {
- assert columnPrecInfo.precision >= 0 : "Fixed length precision cannot be
lower than zero";
-
// TODO - We should not use force reload for sparse columns (values with
lot of nulls)
load(true); // load new data to process
final int expectedDataLen = columnPrecInfo.precision;
final int entrySz = 4 + columnPrecInfo.precision;
final int readBatch = Math.min(entry.getMaxEntries(), valuesToRead);
+ Preconditions.checkState(readBatch > 0, "Read batch count [%s] should be
greater than zero", readBatch);
+
final int[] valueLengths = entry.getValuesLength();
final byte[] tgtBuff = entry.getInternalDataArray();
final byte[] srcBuff = buffer.array();
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
index cacd5c8495f..6c8891ff959 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenOverflowReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet.columnreaders;
+import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import
org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.VarLenColumnBulkInputCallback;
@@ -80,6 +81,8 @@ VarLenColumnBulkEntry getEntry(int valuesToRead) {
// load some overflow data for processing
final int maxValues = Math.min(entry.getMaxEntries(), valuesToRead);
final int numAvailableValues =
overflowDataCache.load(overflowState.currValueIdx, maxValues);
+ Preconditions.checkState(numAvailableValues > 0, "Number values to read
[%s] should be greater than zero", numAvailableValues);
+
final int firstValueDataOffset = getDataBufferStartOffset() +
adjustDataOffset(overflowState.currValueIdx);
int totalDataLen = 0;
int currValueIdx = overflowState.currValueIdx;
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services