This is an automated email from the ASF dual-hosted git repository.

luoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 37abb0a  DRILL-7934: Fix NullPointerException error when reading 
parquet files
37abb0a is described below

commit 37abb0a84354e8bdcfc34e67730d01412a041e82
Author: chenx <[email protected]>
AuthorDate: Mon May 31 17:53:22 2021 +0800

    DRILL-7934: Fix NullPointerException error when reading parquet files
---
 .../store/parquet/ParquetGroupScanStatistics.java  |  12 ++++--
 .../drill/exec/store/parquet/TestParquetScan.java  |  42 +++++++++++++++++++++
 .../test/resources/parquet/test_type_null.parquet  | Bin 0 -> 1922 bytes
 3 files changed, 51 insertions(+), 3 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
index f15409d..52bddba 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet;
 
+import org.apache.commons.lang3.ObjectUtils;
 import org.apache.commons.lang3.mutable.MutableLong;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
@@ -115,8 +116,13 @@ public class ParquetGroupScanStatistics<T extends 
BaseMetadata & LocationProvide
           previousCount.setValue(Statistic.NO_COLUMN_STATS);
         }
         ColumnMetadata columnMetadata = 
SchemaPathUtils.getColumnMetadata(schemaPath, metadata.getSchema());
+        // DRILL-7934
+        // base on 
metastore/metastore-api/src/main/java/org/apache/drill/metastore/util/SchemaPathUtils.java#145
+        // list schema is skipped, so that in this class drill can not get 
majorType by schemaPath.
+        // we can change null type to return false to avoid 
NullPointerException
         TypeProtos.MajorType majorType = columnMetadata != null ? 
columnMetadata.majorType() : null;
-        boolean partitionColumn = checkForPartitionColumn(statistics, first, 
localRowCount, majorType, schemaPath);
+        boolean partitionColumn = majorType != null
+                && checkForPartitionColumn(statistics, first, localRowCount, 
majorType, schemaPath);
         if (partitionColumn) {
           Object value = partitionValueMap.get(metadata.getPath(), schemaPath);
           Object currentValue = 
ColumnStatisticsKind.MAX_VALUE.getFrom(statistics);
@@ -176,14 +182,14 @@ public class ParquetGroupScanStatistics<T extends 
BaseMetadata & LocationProvide
         return false;
       }
     } else {
-      if (!partitionColTypeMap.keySet().contains(schemaPath)) {
+      if (!partitionColTypeMap.containsKey(schemaPath)) {
         return false;
       } else {
         if (!hasSingleValue(columnStatistics, rowCount)) {
           partitionColTypeMap.remove(schemaPath);
           return false;
         }
-        if (!partitionColTypeMap.get(schemaPath).equals(type)) {
+        if (ObjectUtils.notEqual(partitionColTypeMap.get(schemaPath), type)) {
           partitionColTypeMap.remove(schemaPath);
           return false;
         }
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
index ac61eb7..26372be 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetScan.java
@@ -28,6 +28,9 @@ import org.junit.experimental.categories.Category;
 import java.io.File;
 import java.nio.file.Path;
 
+import static org.apache.drill.test.TestBuilder.listOf;
+import static org.apache.drill.test.TestBuilder.mapOf;
+
 @Category({ParquetTest.class, UnlikelyTest.class})
 public class TestParquetScan extends BaseTestQuery {
   @Test
@@ -51,4 +54,43 @@ public class TestParquetScan extends BaseTestQuery {
         .build()
         .run();
   }
+
+  // DRILL-7934: Fix NullPointerException error when reading parquet files
+  @Test
+  public void testTypeNull() throws Exception {
+    /* the `features` schema is:
+    optional group features {
+      required int32 type (INTEGER(8,true));
+      optional int32 size;
+      optional group indices (LIST) {
+        repeated group list {
+          required int32 element;
+        }
+      }
+      optional group values (LIST) {
+        repeated group list {
+          required double element;
+        }
+      }
+    }
+    */
+    String sql = "SELECT * FROM cp.`parquet/test_type_null.parquet`";
+    testBuilder()
+            .sqlQuery(sql)
+            .unOrdered()
+            .baselineColumns("label", "features")
+            .baselineValues(0.0d,
+                    mapOf("type", 1,
+                    "indices", listOf(),
+                    "values", listOf(112.0d, 213.0d, 213.0d)))
+            .baselineValues(0.0d,
+                    mapOf("type", 1,
+                    "indices", listOf(),
+                    "values", listOf(213.0d, 123.0d, 123.0d)))
+            .baselineValues(2.0d, mapOf(
+                    "type", 1,
+                    "indices", listOf(),
+                    "values", listOf(333.0d, 333.0d, 333.0d)))
+            .go();
+  }
 }
diff --git a/exec/java-exec/src/test/resources/parquet/test_type_null.parquet 
b/exec/java-exec/src/test/resources/parquet/test_type_null.parquet
new file mode 100644
index 0000000..4a04324
Binary files /dev/null and 
b/exec/java-exec/src/test/resources/parquet/test_type_null.parquet differ

Reply via email to