This is an automated email from the ASF dual-hosted git repository.

volodymyr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit dcc25800902dea81e87e2c7e3ddfb7fd9b281b42
Author: Vlad Rozov <[email protected]>
AuthorDate: Wed Jun 27 10:45:49 2018 -0700

    DRILL-6554: Minor code improvements in parquet statistics handling
    
    closes #1349
---
 .../apache/drill/exec/expr/stat/ParquetIsPredicate.java  | 16 ++++++++--------
 .../drill/exec/expr/stat/ParquetPredicatesHelper.java    | 16 ++++++++--------
 .../drill/exec/store/parquet/ParquetReaderUtility.java   | 15 ++++-----------
 .../drill/exec/store/parquet/metadata/Metadata.java      |  2 +-
 4 files changed, 21 insertions(+), 28 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java
index 547dc06..42e6e0b 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java
@@ -113,9 +113,9 @@ public class ParquetIsPredicate<C extends Comparable<C>> 
extends LogicalExpressi
    * IS TRUE predicate.
    */
   private static LogicalExpression createIsTruePredicate(LogicalExpression 
expr) {
-    return new ParquetIsPredicate<Boolean>(expr,
+    return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
         //if max value is not true or if there are all nulls  -> canDrop
-        (exprStat, evaluator) -> !((BooleanStatistics)exprStat).getMax() || 
isAllNulls(exprStat, evaluator.getRowCount())
+        isAllNulls(exprStat, evaluator.getRowCount()) || 
exprStat.hasNonNullValue() && !((BooleanStatistics) exprStat).getMax()
     );
   }
 
@@ -123,9 +123,9 @@ public class ParquetIsPredicate<C extends Comparable<C>> 
extends LogicalExpressi
    * IS FALSE predicate.
    */
   private static LogicalExpression createIsFalsePredicate(LogicalExpression 
expr) {
-    return new ParquetIsPredicate<Boolean>(expr,
+    return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
         //if min value is not false or if there are all nulls  -> canDrop
-        (exprStat, evaluator) -> ((BooleanStatistics)exprStat).getMin() || 
isAllNulls(exprStat, evaluator.getRowCount())
+        isAllNulls(exprStat, evaluator.getRowCount()) || 
exprStat.hasNonNullValue() && ((BooleanStatistics) exprStat).getMin()
     );
   }
 
@@ -133,9 +133,9 @@ public class ParquetIsPredicate<C extends Comparable<C>> 
extends LogicalExpressi
    * IS NOT TRUE predicate.
    */
   private static LogicalExpression createIsNotTruePredicate(LogicalExpression 
expr) {
-    return new ParquetIsPredicate<Boolean>(expr,
+    return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
         //if min value is not false or if there are no nulls  -> canDrop
-        (exprStat, evaluator) -> ((BooleanStatistics)exprStat).getMin() && 
hasNoNulls(exprStat)
+        hasNoNulls(exprStat) && exprStat.hasNonNullValue() && 
((BooleanStatistics) exprStat).getMin()
     );
   }
 
@@ -143,9 +143,9 @@ public class ParquetIsPredicate<C extends Comparable<C>> 
extends LogicalExpressi
    * IS NOT FALSE predicate.
    */
   private static LogicalExpression createIsNotFalsePredicate(LogicalExpression 
expr) {
-    return new ParquetIsPredicate<Boolean>(expr,
+    return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
         //if max value is not true or if there are no nulls  -> canDrop
-        (exprStat, evaluator) -> !((BooleanStatistics)exprStat).getMax() && 
hasNoNulls(exprStat)
+        hasNoNulls(exprStat) && exprStat.hasNonNullValue() && 
!((BooleanStatistics) exprStat).getMax()
     );
   }
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java
index f804a7b..de4df1f 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.expr.stat;
 
+import org.apache.parquet.Preconditions;
 import org.apache.parquet.column.statistics.Statistics;
 
 /**
@@ -28,7 +29,7 @@ class ParquetPredicatesHelper {
 
   /**
    * @param stat statistics object
-   * @return true if the input stat object has valid statistics; false 
otherwise
+   * @return <tt>true</tt> if the input stat object has valid statistics; 
false otherwise
    */
   static boolean isNullOrEmpty(Statistics stat) {
     return stat == null || stat.isEmpty();
@@ -39,22 +40,21 @@ class ParquetPredicatesHelper {
    *
    * @param stat parquet column statistics
    * @param rowCount number of rows in the parquet file
-   * @return True if all rows are null in the parquet file
-   *          False if at least one row is not null.
+   * @return <tt>true</tt> if all rows are null in the parquet file and 
<tt>false</tt> otherwise
    */
   static boolean isAllNulls(Statistics stat, long rowCount) {
-    return stat.isNumNullsSet() && stat.getNumNulls() == rowCount;
+    Preconditions.checkArgument(rowCount >= 0, String.format("negative 
rowCount %d is not valid", rowCount));
+    return stat.getNumNulls() == rowCount;
   }
 
   /**
-   * Checks that column chunk's statistics has at least one null
+   * Checks that column chunk's statistics does not have nulls
    *
    * @param stat parquet column statistics
-   * @return True if the parquet file has nulls
-   *          False if the parquet file hasn't nulls.
+   * @return <tt>true</tt> if the parquet file does not have nulls and 
<tt>false</tt> otherwise
    */
   static boolean hasNoNulls(Statistics stat) {
-    return !stat.isNumNullsSet() || stat.getNumNulls() == 0;
+    return stat.getNumNulls() <= 0;
   }
 
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
index 40203f5..a7f78fb 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
@@ -32,7 +32,7 @@ import org.apache.drill.exec.work.ExecErrorConstants;
 import org.apache.parquet.SemanticVersion;
 import org.apache.parquet.VersionParser;
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.column.statistics.IntStatistics;
 import org.apache.parquet.format.ConvertedType;
 import org.apache.parquet.format.FileMetaData;
 import org.apache.parquet.format.SchemaElement;
@@ -417,16 +417,9 @@ public class ParquetReaderUtility {
             // column does not appear in this file, skip it
             continue;
           }
-          Statistics statistics = 
footer.getBlocks().get(rowGroupIndex).getColumns().get(colIndex).getStatistics();
-          Integer max = (Integer) statistics.genericGetMax();
-          if (statistics.hasNonNullValue()) {
-            if (max > ParquetReaderUtility.DATE_CORRUPTION_THRESHOLD) {
-              return DateCorruptionStatus.META_SHOWS_CORRUPTION;
-            }
-          } else {
-            // no statistics, go check the first page
-            return DateCorruptionStatus.META_UNCLEAR_TEST_VALUES;
-          }
+          IntStatistics statistics = (IntStatistics) 
footer.getBlocks().get(rowGroupIndex).getColumns().get(colIndex).getStatistics();
+          return (statistics.hasNonNullValue() && 
statistics.compareMaxToValue(ParquetReaderUtility.DATE_CORRUPTION_THRESHOLD) > 
0) ?
+              DateCorruptionStatus.META_SHOWS_CORRUPTION : 
DateCorruptionStatus.META_UNCLEAR_TEST_VALUES;
         }
       }
     }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
index a61cc18..2259169 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java
@@ -473,7 +473,7 @@ public class Metadata {
           // Write stats when they are not null
           Object minValue = null;
           Object maxValue = null;
-          if (stats.genericGetMax() != null && stats.genericGetMin() != null ) 
{
+          if (stats.hasNonNullValue()) {
             minValue = stats.genericGetMin();
             maxValue = stats.genericGetMax();
             if (containsCorruptDates == 
ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_CORRUPTION

Reply via email to