This is an automated email from the ASF dual-hosted git repository.
luoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 37abb0a DRILL-7934: Fix NullPointerException error when reading
parquet files
37abb0a is described below
commit 37abb0a84354e8bdcfc34e67730d01412a041e82
Author: chenx <[email protected]>
AuthorDate: Mon May 31 17:53:22 2021 +0800
DRILL-7934: Fix NullPointerException error when reading parquet files
---
.../store/parquet/ParquetGroupScanStatistics.java | 12 ++++--
.../drill/exec/store/parquet/TestParquetScan.java | 42 +++++++++++++++++++++
.../test/resources/parquet/test_type_null.parquet | Bin 0 -> 1922 bytes
3 files changed, 51 insertions(+), 3 deletions(-)
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
index f15409d..52bddba 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.parquet;
+import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.mutable.MutableLong;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos;
@@ -115,8 +116,13 @@ public class ParquetGroupScanStatistics<T extends
BaseMetadata & LocationProvide
previousCount.setValue(Statistic.NO_COLUMN_STATS);
}
ColumnMetadata columnMetadata =
SchemaPathUtils.getColumnMetadata(schemaPath, metadata.getSchema());
+ // DRILL-7934
+ // base on
metastore/metastore-api/src/main/java/org/apache/drill/metastore/util/SchemaPathUtils.java#145
+ // list schema is skipped, so that in this class drill can not get
majorType by schemaPath.
+ // we can change null type to return false to avoid
NullPointerException
TypeProtos.MajorType majorType = columnMetadata != null ?
columnMetadata.majorType() : null;
- boolean partitionColumn = checkForPartitionColumn(statistics, first,
localRowCount, majorType, schemaPath);
+ boolean partitionColumn = majorType != null
+ && checkForPartitionColumn(statistics, first, localRowCount,
majorType, schemaPath);
if (partitionColumn) {
Object value = partitionValueMap.get(metadata.getPath(), schemaPath);
Object currentValue =
ColumnStatisticsKind.MAX_VALUE.getFrom(statistics);
@@ -176,14 +182,14 @@ public class ParquetGroupScanStatistics<T extends
BaseMetadata & LocationProvide
return false;
}
} else {
- if (!partitionColTypeMap.keySet().contains(schemaPath)) {
+ if (!partitionColTypeMap.containsKey(schemaPath)) {
return false;
} else {
if (!hasSingleValue(columnStatistics, rowCount)) {
partitionColTypeMap.remove(schemaPath);
return false;
}
- if (!partitionColTypeMap.get(schemaPath).equals(type)) {
+ if (ObjectUtils.notEqual(partitionColTypeMap.get(schemaPath), type)) {
partitionColTypeMap.remove(schemaPath);
return false;
}
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
index ac61eb7..26372be 100644
---
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
+++
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
@@ -28,6 +28,9 @@ import org.junit.experimental.categories.Category;
import java.io.File;
import java.nio.file.Path;
+import static org.apache.drill.test.TestBuilder.listOf;
+import static org.apache.drill.test.TestBuilder.mapOf;
+
@Category({ParquetTest.class, UnlikelyTest.class})
public class TestParquetScan extends BaseTestQuery {
@Test
@@ -51,4 +54,43 @@ public class TestParquetScan extends BaseTestQuery {
.build()
.run();
}
+
+ // DRILL-7934: Fix NullPointerException error when reading parquet files
+ @Test
+ public void testTypeNull() throws Exception {
+ /* the `features` schema is:
+ optional group features {
+ required int32 type (INTEGER(8,true));
+ optional int32 size;
+ optional group indices (LIST) {
+ repeated group list {
+ required int32 element;
+ }
+ }
+ optional group values (LIST) {
+ repeated group list {
+ required double element;
+ }
+ }
+ }
+ */
+ String sql = "SELECT * FROM cp.`parquet/test_type_null.parquet`";
+ testBuilder()
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("label", "features")
+ .baselineValues(0.0d,
+ mapOf("type", 1,
+ "indices", listOf(),
+ "values", listOf(112.0d, 213.0d, 213.0d)))
+ .baselineValues(0.0d,
+ mapOf("type", 1,
+ "indices", listOf(),
+ "values", listOf(213.0d, 123.0d, 123.0d)))
+ .baselineValues(2.0d, mapOf(
+ "type", 1,
+ "indices", listOf(),
+ "values", listOf(333.0d, 333.0d, 333.0d)))
+ .go();
+ }
}
diff --git a/exec/java-exec/src/test/resources/parquet/test_type_null.parquet
b/exec/java-exec/src/test/resources/parquet/test_type_null.parquet
new file mode 100644
index 0000000..4a04324
Binary files /dev/null and
b/exec/java-exec/src/test/resources/parquet/test_type_null.parquet differ