This is an automated email from the ASF dual-hosted git repository.

boaz pushed a commit to branch 1.14.0
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 2a5e983a7294633fd0e3e274bcd68d1ad4080fd3
Author: Volodymyr Vysotskyi <vvo...@gmail.com>
AuthorDate: Fri Jul 27 13:49:09 2018 +0300

    DRILL-6641: Fix columnValueCounts in ParquetGroupScanStatistics when 
ParquetGroupScan has RowGroupInfo without column statistics
---
 .../store/parquet/ParquetGroupScanStatistics.java  | 26 ++++++++++------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
index f7d5687..9381043 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScanStatistics.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.parquet;
 
+import org.apache.commons.lang3.mutable.MutableLong;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.exec.physical.base.GroupScan;
@@ -44,7 +45,7 @@ public class ParquetGroupScanStatistics {
   // only for partition columns : value is unique for each partition
   private Map<SchemaPath, TypeProtos.MajorType> partitionColTypeMap;
   // total number of non-null value for each column in parquet files
-  private Map<SchemaPath, Long> columnValueCounts;
+  private Map<SchemaPath, MutableLong> columnValueCounts;
   // total number of rows (obtained from parquet footer)
   private long rowCount;
 
@@ -61,7 +62,8 @@ public class ParquetGroupScanStatistics {
   }
 
   public long getColumnValueCount(SchemaPath column) {
-    return columnValueCounts.containsKey(column) ? 
columnValueCounts.get(column) : 0;
+    MutableLong count = columnValueCounts.get(column);
+    return count != null ? count.getValue() : 0;
   }
 
   public List<SchemaPath> getPartitionColumns() {
@@ -87,19 +89,15 @@ public class ParquetGroupScanStatistics {
       long rowCount = rowGroup.getRowCount();
       for (ColumnMetadata column : rowGroup.getColumns()) {
         SchemaPath schemaPath = SchemaPath.getCompoundPath(column.getName());
-        Long previousCount = columnValueCounts.get(schemaPath);
-        if (previousCount != null) {
-          if (previousCount != GroupScan.NO_COLUMN_STATS && 
column.isNumNullsSet()) {
-            Long newCount = rowCount - column.getNulls();
-            columnValueCounts.put(schemaPath, 
columnValueCounts.get(schemaPath) + newCount);
-          }
+        MutableLong emptyCount = new MutableLong();
+        MutableLong previousCount = columnValueCounts.putIfAbsent(schemaPath, 
emptyCount);
+        if (previousCount == null) {
+          previousCount = emptyCount;
+        }
+        if (previousCount.longValue() != GroupScan.NO_COLUMN_STATS && 
column.isNumNullsSet()) {
+          previousCount.add(rowCount - column.getNulls());
         } else {
-          if (column.isNumNullsSet()) {
-            Long newCount = rowCount - column.getNulls();
-            columnValueCounts.put(schemaPath, newCount);
-          } else {
-            columnValueCounts.put(schemaPath, GroupScan.NO_COLUMN_STATS);
-          }
+          previousCount.setValue(GroupScan.NO_COLUMN_STATS);
         }
         boolean partitionColumn = checkForPartitionColumn(column, first, 
rowCount, parquetTableMetadata);
         if (partitionColumn) {

Reply via email to