[drill] 08/10: DRILL-7565: ANALYZE TABLE ... REFRESH METADATA does not work for empty Parquet files

agozhiy Fri, 21 Feb 2020 04:04:23 -0800

This is an automated email from the ASF dual-hosted git repository.

agozhiy pushed a commit to branch MERGE-200221-00
in repository https://gitbox.apache.org/repos/asf/drill.git


commit 7951e4ca6ec2948df5abdf23a3206be9f53f0c25
Author: Volodymyr Vysotskyi <[email protected]>
AuthorDate: Fri Feb 14 17:51:52 2020 +0200

    DRILL-7565: ANALYZE TABLE ... REFRESH METADATA does not work for empty 
Parquet files
    
    - Fixed ConvertMetadataAggregateToDirectScanRule rule to distinguish array 
columns correctly and proceed using other parquet metadata if such columns are 
found.
    - Added new implicit column which signalizes whether the empty result is 
obtained during collecting metadata and helps to distinguish real data results 
from metadata results.
    - Updated scan to return row with metadata if the above implicit column is 
present.
    - Added unit tests for checking the correctness of both optional and 
required columns from empty files.
    
    closes #1985
---
 .../java/org/apache/drill/exec/ExecConstants.java  |  21 +-
 .../drill/exec/metastore/ColumnNamesOptions.java   |  19 +-
 .../metastore/analyze/AnalyzeFileInfoProvider.java |   1 +
 .../analyze/MetadataAggregateContext.java          |  24 +-
 .../apache/drill/exec/physical/impl/ScanBatch.java |  16 ++
 .../impl/metadata/MetadataAggregateHelper.java     |  40 ++-
 .../impl/metadata/MetadataControllerBatch.java     |   2 +-
 .../ConvertMetadataAggregateToDirectScanRule.java  |  43 +--
 .../sql/handlers/MetastoreAnalyzeTableHandler.java |  16 +-
 .../exec/server/options/SystemOptionManager.java   |   1 +
 .../apache/drill/exec/store/ColumnExplorer.java    |   7 +-
 .../store/parquet/BaseParquetMetadataProvider.java |   2 +-
 .../java-exec/src/main/resources/drill-module.conf |   1 +
 .../drill/exec/sql/TestMetastoreCommands.java      | 310 +++++++++++++++++++++
 14 files changed, 436 insertions(+), 67 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java 
b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 5938ca1..1276384 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -487,31 +487,36 @@ public final class ExecConstants {
   public static final OptionValidator IMPLICIT_SUFFIX_COLUMN_LABEL_VALIDATOR = 
new StringValidator(IMPLICIT_SUFFIX_COLUMN_LABEL,
       new OptionDescription("Available as of Drill 1.10. Sets the implicit 
column name for the suffix column."));
   public static final String IMPLICIT_FQN_COLUMN_LABEL = 
"drill.exec.storage.implicit.fqn.column.label";
-  public static final OptionValidator IMPLICIT_FQN_COLUMN_LABEL_VALIDATOR = 
new StringValidator(IMPLICIT_FQN_COLUMN_LABEL,
+  public static final StringValidator IMPLICIT_FQN_COLUMN_LABEL_VALIDATOR = 
new StringValidator(IMPLICIT_FQN_COLUMN_LABEL,
       new OptionDescription("Available as of Drill 1.10. Sets the implicit 
column name for the fqn column."));
   public static final String IMPLICIT_FILEPATH_COLUMN_LABEL = 
"drill.exec.storage.implicit.filepath.column.label";
-  public static final OptionValidator IMPLICIT_FILEPATH_COLUMN_LABEL_VALIDATOR 
= new StringValidator(IMPLICIT_FILEPATH_COLUMN_LABEL,
+  public static final StringValidator IMPLICIT_FILEPATH_COLUMN_LABEL_VALIDATOR 
= new StringValidator(IMPLICIT_FILEPATH_COLUMN_LABEL,
       new OptionDescription("Available as of Drill 1.10. Sets the implicit 
column name for the filepath column."));
   public static final String IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL = 
"drill.exec.storage.implicit.row_group_index.column.label";
-  public static final OptionValidator 
IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL,
+  public static final StringValidator 
IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL,
       new OptionDescription("Available as of Drill 1.17. Sets the implicit 
column name for the row group index (rgi) column. " +
           "For internal usage when producing Metastore analyze."));
 
   public static final String IMPLICIT_ROW_GROUP_START_COLUMN_LABEL = 
"drill.exec.storage.implicit.row_group_start.column.label";
-  public static final OptionValidator 
IMPLICIT_ROW_GROUP_START_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_ROW_GROUP_START_COLUMN_LABEL,
+  public static final StringValidator 
IMPLICIT_ROW_GROUP_START_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_ROW_GROUP_START_COLUMN_LABEL,
       new OptionDescription("Available as of Drill 1.17. Sets the implicit 
column name for the row group start (rgs) column. " +
           "For internal usage when producing Metastore analyze."));
 
   public static final String IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL = 
"drill.exec.storage.implicit.row_group_length.column.label";
-  public static final OptionValidator 
IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL,
+  public static final StringValidator 
IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL,
       new OptionDescription("Available as of Drill 1.17. Sets the implicit 
column name for the row group length (rgl) column. " +
           "For internal usage when producing Metastore analyze."));
 
   public static final String IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL = 
"drill.exec.storage.implicit.last_modified_time.column.label";
-  public static final OptionValidator 
IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL,
+  public static final StringValidator 
IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL,
       new OptionDescription("Available as of Drill 1.17. Sets the implicit 
column name for the lastModifiedTime column. " +
           "For internal usage when producing Metastore analyze."));
 
+  public static final String IMPLICIT_PROJECT_METADATA_COLUMN_LABEL = 
"drill.exec.storage.implicit.project_metadata.column.label";
+  public static final StringValidator 
IMPLICIT_PROJECT_METADATA_COLUMN_LABEL_VALIDATOR = new 
StringValidator(IMPLICIT_PROJECT_METADATA_COLUMN_LABEL,
+      new OptionDescription("Available as of Drill 1.18. Sets the implicit 
column name for the $project_metadata$ column. " +
+          "For internal usage when producing Metastore analyze."));
+
   public static final String JSON_READ_NUMBERS_AS_DOUBLE = 
"store.json.read_numbers_as_double";
   public static final BooleanValidator JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR = 
new BooleanValidator(JSON_READ_NUMBERS_AS_DOUBLE,
       new OptionDescription("Reads numbers with or without a decimal point as 
DOUBLE. Prevents schema change errors."));
@@ -1109,14 +1114,14 @@ public final class ExecConstants {
    */
   public static final String METASTORE_USE_SCHEMA_METADATA = 
"metastore.metadata.use_schema";
   public static final BooleanValidator METASTORE_USE_SCHEMA_METADATA_VALIDATOR 
= new BooleanValidator(METASTORE_USE_SCHEMA_METADATA,
-      new OptionDescription("Enables schema usage, stored to the Metastore. 
Default is false. (Drill 1.17+)"));
+      new OptionDescription("Enables schema usage, stored to the Metastore. 
Default is true. (Drill 1.17+)"));
 
   /**
    * Option for enabling statistics usage, stored in the Metastore, at the 
planning stage.
    */
   public static final String METASTORE_USE_STATISTICS_METADATA = 
"metastore.metadata.use_statistics";
   public static final BooleanValidator 
METASTORE_USE_STATISTICS_METADATA_VALIDATOR = new 
BooleanValidator(METASTORE_USE_STATISTICS_METADATA,
-      new OptionDescription("Enables statistics usage, stored in the 
Metastore, at the planning stage. Default is false. (Drill 1.17+)"));
+      new OptionDescription("Enables statistics usage, stored in the 
Metastore, at the planning stage. Default is true. (Drill 1.17+)"));
 
   /**
    * Option for collecting schema and / or column statistics for every table 
after CTAS and CTTAS execution.
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/ColumnNamesOptions.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/ColumnNamesOptions.java
index 0b9faca..7a23c12 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/ColumnNamesOptions.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/ColumnNamesOptions.java
@@ -32,14 +32,16 @@ public class ColumnNamesOptions {
   private final String rowGroupStart;
   private final String rowGroupLength;
   private final String lastModifiedTime;
+  private final String projectMetadataColumn;
 
   public ColumnNamesOptions(OptionManager optionManager) {
-    this.fullyQualifiedName = 
optionManager.getOption(ExecConstants.IMPLICIT_FQN_COLUMN_LABEL).string_val;
-    this.partitionColumnNameLabel = 
optionManager.getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
-    this.rowGroupIndex = 
optionManager.getOption(ExecConstants.IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL).string_val;
-    this.rowGroupStart = 
optionManager.getOption(ExecConstants.IMPLICIT_ROW_GROUP_START_COLUMN_LABEL).string_val;
-    this.rowGroupLength = 
optionManager.getOption(ExecConstants.IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL).string_val;
-    this.lastModifiedTime = 
optionManager.getOption(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL).string_val;
+    this.fullyQualifiedName = 
optionManager.getOption(ExecConstants.IMPLICIT_FQN_COLUMN_LABEL_VALIDATOR);
+    this.partitionColumnNameLabel = 
optionManager.getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR);
+    this.rowGroupIndex = 
optionManager.getOption(ExecConstants.IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL_VALIDATOR);
+    this.rowGroupStart = 
optionManager.getOption(ExecConstants.IMPLICIT_ROW_GROUP_START_COLUMN_LABEL_VALIDATOR);
+    this.rowGroupLength = 
optionManager.getOption(ExecConstants.IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL_VALIDATOR);
+    this.lastModifiedTime = 
optionManager.getOption(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL_VALIDATOR);
+    this.projectMetadataColumn = 
optionManager.getOption(ExecConstants.IMPLICIT_PROJECT_METADATA_COLUMN_LABEL_VALIDATOR);
   }
 
   public String partitionColumnNameLabel() {
@@ -66,6 +68,10 @@ public class ColumnNamesOptions {
     return lastModifiedTime;
   }
 
+  public String projectMetadataColumn() {
+    return projectMetadataColumn;
+  }
+
   @Override
   public String toString() {
     return new StringJoiner(", ", ColumnNamesOptions.class.getSimpleName() + 
"[", "]")
@@ -75,6 +81,7 @@ public class ColumnNamesOptions {
         .add("rowGroupStart='" + rowGroupStart + "'")
         .add("rowGroupLength='" + rowGroupLength + "'")
         .add("lastModifiedTime='" + lastModifiedTime + "'")
+        .add("projectMetadataColumn='" + projectMetadataColumn + "'")
         .toString();
   }
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeFileInfoProvider.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeFileInfoProvider.java
index a4bf0ad..1d2f2db 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeFileInfoProvider.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeFileInfoProvider.java
@@ -64,6 +64,7 @@ public abstract class AnalyzeFileInfoProvider implements 
AnalyzeInfoProvider {
     List<SchemaPath> projectionList = new ArrayList<>(getSegmentColumns(table, 
columnNamesOptions));
     
projectionList.add(SchemaPath.getSimplePath(columnNamesOptions.fullyQualifiedName()));
     
projectionList.add(SchemaPath.getSimplePath(columnNamesOptions.lastModifiedTime()));
+    
projectionList.add(SchemaPath.getSimplePath(columnNamesOptions.projectMetadataColumn()));
     return Collections.unmodifiableList(projectionList);
   }
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataAggregateContext.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataAggregateContext.java
index 9108345..99db025 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataAggregateContext.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataAggregateContext.java
@@ -35,7 +35,11 @@ import java.util.StringJoiner;
 public class MetadataAggregateContext {
   private final List<NamedExpression> groupByExpressions;
   private final List<SchemaPath> interestingColumns;
-  private final List<SchemaPath> excludedColumns;
+
+  /**
+   * List of columns which do not belong to table schema, but used to pass 
some metadata information like file location, row group index, etc.
+   */
+  private final List<SchemaPath> metadataColumns;
   private final boolean createNewAggregations;
   private final MetadataType metadataLevel;
 
@@ -43,7 +47,7 @@ public class MetadataAggregateContext {
     this.groupByExpressions = builder.groupByExpressions;
     this.interestingColumns = builder.interestingColumns;
     this.createNewAggregations = builder.createNewAggregations;
-    this.excludedColumns = builder.excludedColumns;
+    this.metadataColumns = builder.metadataColumns;
     this.metadataLevel = builder.metadataLevel;
   }
 
@@ -63,8 +67,8 @@ public class MetadataAggregateContext {
   }
 
   @JsonProperty
-  public List<SchemaPath> excludedColumns() {
-    return excludedColumns;
+  public List<SchemaPath> metadataColumns() {
+    return metadataColumns;
   }
 
   @JsonProperty
@@ -78,7 +82,7 @@ public class MetadataAggregateContext {
         .add("groupByExpressions=" + groupByExpressions)
         .add("interestingColumns=" + interestingColumns)
         .add("createNewAggregations=" + createNewAggregations)
-        .add("excludedColumns=" + excludedColumns)
+        .add("excludedColumns=" + metadataColumns)
         .toString();
   }
 
@@ -91,7 +95,7 @@ public class MetadataAggregateContext {
         .groupByExpressions(groupByExpressions)
         .interestingColumns(interestingColumns)
         .createNewAggregations(createNewAggregations)
-        .excludedColumns(excludedColumns)
+        .metadataColumns(metadataColumns)
         .metadataLevel(metadataLevel);
   }
 
@@ -101,7 +105,7 @@ public class MetadataAggregateContext {
     private List<SchemaPath> interestingColumns;
     private Boolean createNewAggregations;
     private MetadataType metadataLevel;
-    private List<SchemaPath> excludedColumns;
+    private List<SchemaPath> metadataColumns;
 
     public MetadataAggregateContextBuilder 
groupByExpressions(List<NamedExpression> groupByExpressions) {
       this.groupByExpressions = groupByExpressions;
@@ -123,15 +127,15 @@ public class MetadataAggregateContext {
       return this;
     }
 
-    public MetadataAggregateContextBuilder excludedColumns(List<SchemaPath> 
excludedColumns) {
-      this.excludedColumns = excludedColumns;
+    public MetadataAggregateContextBuilder metadataColumns(List<SchemaPath> 
metadataColumns) {
+      this.metadataColumns = metadataColumns;
       return this;
     }
 
     public MetadataAggregateContext build() {
       Objects.requireNonNull(groupByExpressions, "groupByExpressions were not 
set");
       Objects.requireNonNull(createNewAggregations, "createNewAggregations was 
not set");
-      Objects.requireNonNull(excludedColumns, "excludedColumns were not set");
+      Objects.requireNonNull(metadataColumns, "metadataColumns were not set");
       Objects.requireNonNull(metadataLevel, "metadataLevel was not set");
       return new MetadataAggregateContext(this);
     }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
index e159ec6..d1d1c48 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
@@ -29,6 +29,7 @@ import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.map.CaseInsensitiveMap;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.exception.OutOfMemoryException;
 import org.apache.drill.exec.exception.SchemaChangeException;
 import org.apache.drill.exec.expr.TypeHelper;
@@ -234,6 +235,21 @@ public class ScanBatch implements CloseableRecordBatch {
       logger.trace("currentReader.next return recordCount={}", recordCount);
       Preconditions.checkArgument(recordCount >= 0, "recordCount from 
RecordReader.next() should not be negative");
       boolean isNewSchema = mutator.isNewSchema();
+      // If scan is done for collecting metadata, additional implicit column 
`$project_metadata$`
+      // will be projected to handle the case when scan may return empty 
results (scan on empty file or row group).
+      // Scan will return single row for the case when empty file or row group 
is present with correct
+      // values of other implicit columns (like `fqn`, `rgi`), so this 
metadata will be stored to the Metastore.
+      if (implicitValues != null) {
+        String projectMetadataColumn = 
context.getOptions().getOption(ExecConstants.IMPLICIT_PROJECT_METADATA_COLUMN_LABEL_VALIDATOR);
+        if (recordCount > 0) {
+          // Sets the implicit value to false to signal that some results were 
returned and there is no need for creating an additional record.
+          implicitValues.replace(projectMetadataColumn, 
Boolean.FALSE.toString());
+        } else if 
(Boolean.parseBoolean(implicitValues.get(projectMetadataColumn))) {
+          recordCount++;
+          // Sets implicit value to null to avoid affecting resulting count 
value.
+          implicitValues.put(projectMetadataColumn, null);
+        }
+      }
       populateImplicitVectors();
       mutator.container.setValueCount(recordCount);
       oContext.getStats().batchReceived(0, recordCount, isNewSchema);
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataAggregateHelper.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataAggregateHelper.java
index 6f00dea..9122de1 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataAggregateHelper.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataAggregateHelper.java
@@ -22,7 +22,9 @@ import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.drill.common.expression.ExpressionPosition;
 import org.apache.drill.common.expression.FieldReference;
 import org.apache.drill.common.expression.FunctionCall;
+import org.apache.drill.common.expression.IfExpression;
 import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.NullExpression;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.expression.ValueExpressions;
 import org.apache.drill.common.logical.data.NamedExpression;
@@ -55,6 +57,7 @@ public class MetadataAggregateHelper {
   private final ColumnNamesOptions columnNamesOptions;
   private final BatchSchema schema;
   private final AggPrelBase.OperatorPhase phase;
+  private final List<SchemaPath> excludedColumns;
 
   public MetadataAggregateHelper(MetadataAggregateContext context, 
ColumnNamesOptions columnNamesOptions,
       BatchSchema schema, AggPrelBase.OperatorPhase phase) {
@@ -63,6 +66,8 @@ public class MetadataAggregateHelper {
     this.schema = schema;
     this.phase = phase;
     this.valueExpressions = new ArrayList<>();
+    this.excludedColumns = new ArrayList<>(context.metadataColumns());
+    
excludedColumns.add(SchemaPath.getSimplePath(columnNamesOptions.projectMetadataColumn()));
     createAggregatorInternal();
   }
 
@@ -71,8 +76,6 @@ public class MetadataAggregateHelper {
   }
 
   private void createAggregatorInternal() {
-    List<SchemaPath> excludedColumns = context.excludedColumns();
-
     // Iterates through input expressions and adds aggregate calls for table 
fields
     // to collect required statistics (MIN, MAX, COUNT, etc.) or aggregate 
calls to merge incoming metadata
     getUnflattenedFileds(Lists.newArrayList(schema), null)
@@ -117,16 +120,16 @@ public class MetadataAggregateHelper {
       }
     }
 
-    for (SchemaPath excludedColumn : excludedColumns) {
-      if 
(excludedColumn.equals(SchemaPath.getSimplePath(columnNamesOptions.rowGroupStart()))
-          || 
excludedColumn.equals(SchemaPath.getSimplePath(columnNamesOptions.rowGroupLength())))
 {
-        LogicalExpression lastModifiedTime = new FunctionCall("any_value",
+    for (SchemaPath metadataColumns : context.metadataColumns()) {
+      if 
(metadataColumns.equals(SchemaPath.getSimplePath(columnNamesOptions.rowGroupStart()))
+          || 
metadataColumns.equals(SchemaPath.getSimplePath(columnNamesOptions.rowGroupLength())))
 {
+        LogicalExpression anyValueCall = new FunctionCall("any_value",
             Collections.singletonList(
-                
FieldReference.getWithQuotedRef(excludedColumn.getRootSegmentPath())),
+                
FieldReference.getWithQuotedRef(metadataColumns.getRootSegmentPath())),
             ExpressionPosition.UNKNOWN);
 
-        valueExpressions.add(new NamedExpression(lastModifiedTime,
-            
FieldReference.getWithQuotedRef(excludedColumn.getRootSegmentPath())));
+        valueExpressions.add(new NamedExpression(anyValueCall,
+            
FieldReference.getWithQuotedRef(metadataColumns.getRootSegmentPath())));
       }
     }
 
@@ -207,9 +210,8 @@ public class MetadataAggregateHelper {
    */
   private void addCollectListCall(List<LogicalExpression> fieldList) {
     ArrayList<LogicalExpression> collectListArguments = new 
ArrayList<>(fieldList);
-    List<SchemaPath> excludedColumns = context.excludedColumns();
     // populate columns which weren't included in the schema, but should be 
collected to the COLLECTED_MAP_FIELD
-    for (SchemaPath logicalExpressions : excludedColumns) {
+    for (SchemaPath logicalExpressions : context.metadataColumns()) {
       // adds string literal with field name to the list
       
collectListArguments.add(ValueExpressions.getChar(logicalExpressions.getRootSegmentPath(),
           
DrillRelDataTypeSystem.DRILL_REL_DATATYPE_SYSTEM.getDefaultPrecision(SqlTypeName.VARCHAR)));
@@ -254,7 +256,7 @@ public class MetadataAggregateHelper {
   private void addMetadataAggregateCalls() {
     AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.forEach((statisticsKind, 
sqlKind) -> {
       LogicalExpression call = new FunctionCall(sqlKind.name(),
-          Collections.singletonList(ValueExpressions.getBigInt(1)), 
ExpressionPosition.UNKNOWN);
+          
Collections.singletonList(FieldReference.getWithQuotedRef(columnNamesOptions.projectMetadataColumn())),
 ExpressionPosition.UNKNOWN);
       valueExpressions.add(
           new NamedExpression(call,
               
FieldReference.getWithQuotedRef(AnalyzeColumnUtils.getMetadataStatisticsFieldName(statisticsKind))));
@@ -275,7 +277,6 @@ public class MetadataAggregateHelper {
     for (MaterializedField field : fields) {
       // statistics collecting is not supported for array types
       if (field.getType().getMode() != TypeProtos.DataMode.REPEATED) {
-        List<SchemaPath> excludedColumns = context.excludedColumns();
         // excludedColumns are applied for root fields only
         if (parentFields != null || 
!excludedColumns.contains(SchemaPath.getSimplePath(field.getName()))) {
           List<String> currentPath;
@@ -313,8 +314,19 @@ public class MetadataAggregateHelper {
       if (interestingColumns == null || interestingColumns.contains(fieldRef)) 
{
         // collect statistics for all or only interesting columns if they are 
specified
         
AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.forEach((statisticsKind, 
sqlKind) -> {
+          // constructs "case when is not null projectMetadataColumn then 
column1 else null end" call
+          // to avoid using default values for required columns when data for 
empty result is obtained
+          LogicalExpression caseExpr = IfExpression.newBuilder()
+              .setIfCondition(new IfExpression.IfCondition(
+                  new FunctionCall(
+                      "isnotnull",
+                      
Collections.singletonList(FieldReference.getWithQuotedRef(columnNamesOptions.projectMetadataColumn())),
+                      ExpressionPosition.UNKNOWN), fieldRef))
+              .setElse(NullExpression.INSTANCE)
+              .build();
+
           LogicalExpression call = new FunctionCall(sqlKind.name(),
-              Collections.singletonList(fieldRef), ExpressionPosition.UNKNOWN);
+              Collections.singletonList(caseExpr), ExpressionPosition.UNKNOWN);
           valueExpressions.add(
               new NamedExpression(call,
                   
FieldReference.getWithQuotedRef(AnalyzeColumnUtils.getColumnStatisticsFieldName(fieldName,
 statisticsKind))));
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
index 7314961..8ee3beb 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
@@ -586,9 +586,9 @@ public class MetadataControllerBatch extends 
AbstractBinaryRecordBatch<MetadataC
     Multimap<String, StatisticsHolder<?>> columnStatistics = 
ArrayListMultimap.create();
     Map<String, TypeProtos.MinorType> columnTypes = new HashMap<>();
     for (ColumnMetadata column : columnMetadata) {
-      String fieldName = AnalyzeColumnUtils.getColumnName(column.name());
 
       if (AnalyzeColumnUtils.isColumnStatisticsField(column.name())) {
+        String fieldName = AnalyzeColumnUtils.getColumnName(column.name());
         StatisticsKind<?> statisticsKind = 
AnalyzeColumnUtils.getStatisticsKind(column.name());
         columnStatistics.put(fieldName,
             new 
StatisticsHolder<>(getConvertedColumnValue(reader.column(column.name())), 
statisticsKind));
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
index 80a463b..f9b266e 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
@@ -39,6 +39,7 @@ import 
org.apache.drill.exec.store.ColumnExplorer.ImplicitFileColumns;
 import org.apache.drill.exec.store.dfs.DrillFileSystem;
 import org.apache.drill.exec.store.dfs.FormatSelection;
 import org.apache.drill.exec.store.direct.DirectGroupScan;
+import org.apache.drill.exec.store.parquet.BaseParquetMetadataProvider;
 import org.apache.drill.exec.store.parquet.ParquetGroupScan;
 import org.apache.drill.exec.store.pojo.DynamicPojoRecordReader;
 import org.apache.drill.exec.util.ImpersonationUtil;
@@ -50,7 +51,6 @@ import 
org.apache.drill.metastore.statistics.ColumnStatisticsKind;
 import org.apache.drill.metastore.statistics.ExactStatisticsConstants;
 import org.apache.drill.metastore.statistics.StatisticsKind;
 import org.apache.drill.metastore.statistics.TableStatisticsKind;
-import org.apache.drill.metastore.util.SchemaPathUtils;
 import org.apache.drill.shaded.guava.com.google.common.collect.HashBasedTable;
 import org.apache.drill.shaded.guava.com.google.common.collect.Multimap;
 import org.apache.drill.shaded.guava.com.google.common.collect.Table;
@@ -94,11 +94,11 @@ import java.util.stream.IntStream;
  * </pre>
  */
 public class ConvertMetadataAggregateToDirectScanRule extends RelOptRule {
+  private static final Logger logger = 
LoggerFactory.getLogger(ConvertMetadataAggregateToDirectScanRule.class);
+
   public static final ConvertMetadataAggregateToDirectScanRule INSTANCE =
       new ConvertMetadataAggregateToDirectScanRule();
 
-  private static final Logger logger = 
LoggerFactory.getLogger(ConvertMetadataAggregateToDirectScanRule.class);
-
   public ConvertMetadataAggregateToDirectScanRule() {
     super(
         RelOptHelper.some(MetadataAggRel.class, 
RelOptHelper.any(DrillScanRel.class)),
@@ -210,7 +210,7 @@ public class ConvertMetadataAggregateToDirectScanRule 
extends RelOptRule {
 
         // do not gather statistics for array columns as it is not supported 
by Metastore
         if (containsArrayColumn(rowGroupMetadata.getSchema(), schemaPath)) {
-          return null;
+          continue;
         }
 
         if (IsPredicate.isNullOrEmpty(columnStatistics)) {
@@ -232,6 +232,8 @@ public class ConvertMetadataAggregateToDirectScanRule 
extends RelOptRule {
                 columnStatisticsFieldName,
                 statsValue.getClass());
             recordsTable.put(columnStatisticsFieldName, rowIndex, statsValue);
+          } else {
+            recordsTable.put(columnStatisticsFieldName, rowIndex, 
BaseParquetMetadataProvider.NULL_VALUE);
           }
         }
       }
@@ -244,6 +246,8 @@ public class ConvertMetadataAggregateToDirectScanRule 
extends RelOptRule {
         if (statisticsValue != null) {
           schema.putIfAbsent(metadataStatisticsFieldName, 
statisticsValue.getClass());
           recordsTable.put(metadataStatisticsFieldName, rowIndex, 
statisticsValue);
+        } else {
+          recordsTable.put(metadataStatisticsFieldName, rowIndex, 
BaseParquetMetadataProvider.NULL_VALUE);
         }
       }
 
@@ -258,15 +262,19 @@ public class ConvertMetadataAggregateToDirectScanRule 
extends RelOptRule {
 
     // DynamicPojoRecordReader requires LinkedHashMap with fields order
     // which corresponds to the value position in record list.
-    LinkedHashMap<String, Class<?>> orderedSchema = 
recordsTable.rowKeySet().stream()
-        .collect(Collectors.toMap(
-            Function.identity(),
-            column -> schema.getOrDefault(column, Integer.class),
-            (o, n) -> n,
-            LinkedHashMap::new));
+    LinkedHashMap<String, Class<?>> orderedSchema = new LinkedHashMap<>();
+    for (String s : recordsTable.rowKeySet()) {
+      Class<?> clazz = schema.get(s);
+      if (clazz != null) {
+        orderedSchema.put(s, clazz);
+      } else {
+        return null;
+      }
+    }
 
     IntFunction<List<Object>> collectRecord = currentIndex -> 
orderedSchema.keySet().stream()
         .map(column -> recordsTable.get(column, currentIndex))
+        .map(value -> value != BaseParquetMetadataProvider.NULL_VALUE ? value 
: null)
         .collect(Collectors.toList());
 
     List<List<Object>> records = IntStream.range(0, rowIndex)
@@ -288,12 +296,11 @@ public class ConvertMetadataAggregateToDirectScanRule 
extends RelOptRule {
    * @return {@code true} if any segment in the schema path is an array, 
{@code false} otherwise
    */
   private static boolean containsArrayColumn(TupleMetadata schema, SchemaPath 
schemaPath) {
-    ColumnMetadata columnMetadata = 
SchemaPathUtils.getColumnMetadata(schemaPath, schema);
     PathSegment currentPath = schemaPath.getRootSegment();
-    ColumnMetadata currentColumn = columnMetadata;
-    do {
-      if (currentColumn.isArray()) {
-        return false;
+    ColumnMetadata columnMetadata = 
schema.metadata(currentPath.getNameSegment().getPath());
+    while (columnMetadata != null) {
+      if (columnMetadata.isArray()) {
+        return true;
       } else if (columnMetadata.isMap()) {
         currentPath = currentPath.getChild();
         columnMetadata = 
columnMetadata.tupleSchema().metadata(currentPath.getNameSegment().getPath());
@@ -301,9 +308,9 @@ public class ConvertMetadataAggregateToDirectScanRule 
extends RelOptRule {
         currentPath = currentPath.getChild();
         columnMetadata = ((DictColumnMetadata) 
columnMetadata).valueColumnMetadata();
       } else {
-        return true;
+        return false;
       }
-    } while (columnMetadata != null);
-    return true;
+    }
+    return false;
   }
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MetastoreAnalyzeTableHandler.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MetastoreAnalyzeTableHandler.java
index 36eae41..2be8dfc 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MetastoreAnalyzeTableHandler.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/MetastoreAnalyzeTableHandler.java
@@ -406,13 +406,13 @@ public class MetastoreAnalyzeTableHandler extends 
DefaultSqlHandler {
     SchemaPath lastModifiedTimeField =
         
SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL));
 
-    List<SchemaPath> excludedColumns = Arrays.asList(locationField, 
lastModifiedTimeField);
+    List<SchemaPath> metadataColumns = Arrays.asList(locationField, 
lastModifiedTimeField);
 
     MetadataAggregateContext aggregateContext = 
MetadataAggregateContext.builder()
         .groupByExpressions(Collections.emptyList())
         .interestingColumns(statisticsColumns)
         .createNewAggregations(createNewAggregations)
-        .excludedColumns(excludedColumns)
+        .metadataColumns(metadataColumns)
         .metadataLevel(MetadataType.TABLE)
         .build();
 
@@ -433,7 +433,7 @@ public class MetastoreAnalyzeTableHandler extends 
DefaultSqlHandler {
     SchemaPath lastModifiedTimeField =
         
SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL));
 
-    List<SchemaPath> excludedColumns = Arrays.asList(lastModifiedTimeField, 
locationField);
+    List<SchemaPath> metadataColumns = Arrays.asList(lastModifiedTimeField, 
locationField);
 
     List<NamedExpression> groupByExpressions = new 
ArrayList<>(segmentExpressions);
 
@@ -441,7 +441,7 @@ public class MetastoreAnalyzeTableHandler extends 
DefaultSqlHandler {
         .groupByExpressions(groupByExpressions.subList(0, segmentLevel))
         .interestingColumns(statisticsColumns)
         .createNewAggregations(createNewAggregations)
-        .excludedColumns(excludedColumns)
+        .metadataColumns(metadataColumns)
         .metadataLevel(MetadataType.SEGMENT)
         .build();
 
@@ -461,7 +461,7 @@ public class MetastoreAnalyzeTableHandler extends 
DefaultSqlHandler {
     SchemaPath lastModifiedTimeField =
         
SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL));
 
-    List<SchemaPath> excludedColumns = Arrays.asList(lastModifiedTimeField, 
locationField);
+    List<SchemaPath> metadataColumns = Arrays.asList(lastModifiedTimeField, 
locationField);
 
     NamedExpression locationExpression =
         new NamedExpression(locationField, 
FieldReference.getWithQuotedRef(MetastoreAnalyzeConstants.LOCATION_FIELD));
@@ -472,7 +472,7 @@ public class MetastoreAnalyzeTableHandler extends 
DefaultSqlHandler {
         .groupByExpressions(fileGroupByExpressions)
         .interestingColumns(statisticsColumns)
         .createNewAggregations(createNewAggregations)
-        .excludedColumns(excludedColumns)
+        .metadataColumns(metadataColumns)
         .metadataLevel(MetadataType.FILE)
         .build();
 
@@ -505,13 +505,13 @@ public class MetastoreAnalyzeTableHandler extends 
DefaultSqlHandler {
     SchemaPath rowGroupLengthField =
         
SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL));
 
-    List<SchemaPath> excludedColumns = Arrays.asList(lastModifiedTimeField, 
locationField, rgiField, rowGroupStartField, rowGroupLengthField);
+    List<SchemaPath> metadataColumns = Arrays.asList(lastModifiedTimeField, 
locationField, rgiField, rowGroupStartField, rowGroupLengthField);
 
     MetadataAggregateContext aggregateContext = 
MetadataAggregateContext.builder()
         .groupByExpressions(rowGroupGroupByExpressions)
         .interestingColumns(statisticsColumns)
         .createNewAggregations(createNewAggregations)
-        .excludedColumns(excludedColumns)
+        .metadataColumns(metadataColumns)
         .metadataLevel(MetadataType.ROW_GROUP)
         .build();
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index e3ed2f6..430e0a1 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -267,6 +267,7 @@ public class SystemOptionManager extends BaseOptionManager 
implements AutoClosea
       new 
OptionDefinition(ExecConstants.IMPLICIT_ROW_GROUP_START_COLUMN_LABEL_VALIDATOR),
       new 
OptionDefinition(ExecConstants.IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL_VALIDATOR),
       new 
OptionDefinition(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL_VALIDATOR),
+      new 
OptionDefinition(ExecConstants.IMPLICIT_PROJECT_METADATA_COLUMN_LABEL_VALIDATOR),
       new 
OptionDefinition(ExecConstants.CODE_GEN_EXP_IN_METHOD_SIZE_VALIDATOR),
       new 
OptionDefinition(ExecConstants.CREATE_PREPARE_STATEMENT_TIMEOUT_MILLIS_VALIDATOR),
       new 
OptionDefinition(ExecConstants.DYNAMIC_UDF_SUPPORT_ENABLED_VALIDATOR,  new 
OptionMetaData(OptionValue.AccessibleScopes.SYSTEM, true, false)),
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
index d4c45ae..ecb12f3 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
@@ -311,6 +311,9 @@ public class ColumnExplorer {
         case ROW_GROUP_LENGTH:
           implicitValues.put(key, String.valueOf(length));
           break;
+        case PROJECT_METADATA:
+          implicitValues.put(key, Boolean.TRUE.toString());
+          break;
         case LAST_MODIFIED_TIME:
           try {
             implicitValues.put(key, 
String.valueOf(fs.getFileStatus(filePath).getModificationTime()));
@@ -509,7 +512,9 @@ public class ColumnExplorer {
 
     ROW_GROUP_START(ExecConstants.IMPLICIT_ROW_GROUP_START_COLUMN_LABEL),
 
-    ROW_GROUP_LENGTH(ExecConstants.IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL);
+    ROW_GROUP_LENGTH(ExecConstants.IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL),
+
+    PROJECT_METADATA(ExecConstants.IMPLICIT_PROJECT_METADATA_COLUMN_LABEL);
 
     private final String name;
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
index b535e0f..f2e177d 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
@@ -77,7 +77,7 @@ public abstract class BaseParquetMetadataProvider implements 
ParquetMetadataProv
   /**
    * {@link HashBasedTable} cannot contain nulls, used this object to 
represent null values.
    */
-  static final Object NULL_VALUE = new Object();
+  public static final Object NULL_VALUE = new Object();
 
   protected final List<ReadEntryWithPath> entries;
   protected final ParquetReaderConfig readerConfig;
diff --git a/exec/java-exec/src/main/resources/drill-module.conf 
b/exec/java-exec/src/main/resources/drill-module.conf
index 7230426..b365047 100644
--- a/exec/java-exec/src/main/resources/drill-module.conf
+++ b/exec/java-exec/src/main/resources/drill-module.conf
@@ -483,6 +483,7 @@ drill.exec.options: {
     drill.exec.storage.implicit.row_group_start.column.label: "rgs",
     drill.exec.storage.implicit.row_group_length.column.label: "rgl",
     drill.exec.storage.implicit.last_modified_time.column.label: "lmt",
+    drill.exec.storage.implicit.project_metadata.column.label: 
"$project_metadata$",
     drill.exec.testing.controls: "{}",
     drill.exec.memory.operator.output_batch_size : 16777216, # 16 MB
     drill.exec.memory.operator.output_batch_size_avail_mem_factor : 0.1,
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
index 51d36f2..394adca 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
@@ -3160,6 +3160,316 @@ public class TestMetastoreCommands extends ClusterTest {
     }
   }
 
+  @Test
+  public void testAnalyzeEmptyNullableParquetTable() throws Exception {
+    File table = dirTestWatcher.copyResourceToRoot(Paths.get("parquet", 
"empty", "simple", "empty_simple.parquet"));
+
+    String tableName = "parquet/empty/simple/empty_simple.parquet";
+
+    TableInfo tableInfo = getTableInfo(tableName, "default");
+
+    TupleMetadata schema = new SchemaBuilder()
+        .addNullable("id", TypeProtos.MinorType.BIGINT)
+        .addNullable("name", TypeProtos.MinorType.VARCHAR)
+        .build();
+
+    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = 
ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder()
+        .put(SchemaPath.getSimplePath("name"),
+            getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR))
+        .put(SchemaPath.getSimplePath("id"),
+            getColumnStatistics(null, null, 0L, TypeProtos.MinorType.BIGINT))
+        .build();
+
+    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder()
+        .tableInfo(tableInfo)
+        .metadataInfo(TABLE_META_INFO)
+        .schema(schema)
+        .location(new Path(table.toURI().getPath()))
+        .columnsStatistics(columnStatistics)
+        .metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, 
TableStatisticsKind.ROW_COUNT),
+            new StatisticsHolder<>(MetadataType.ALL, 
TableStatisticsKind.ANALYZE_METADATA_LEVEL)))
+        .partitionKeys(Collections.emptyMap())
+        .lastModifiedTime(getMaxLastModified(table))
+        .build();
+
+    try {
+      testBuilder()
+          .sqlQuery("ANALYZE TABLE dfs.`%s` REFRESH METADATA", tableName)
+          .unOrdered()
+          .baselineColumns("ok", "summary")
+          .baselineValues(true, String.format("Collected / refreshed metadata 
for table [dfs.default.%s]", tableName))
+          .go();
+
+      MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .metastoreTableInfo(tableInfo);
+
+      assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
+
+      BaseTableMetadata tableMetadata = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .tableMetadata(tableInfo);
+
+      assertEquals(expectedTableMetadata, tableMetadata);
+
+      List<FileMetadata> filesMetadata = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .filesMetadata(tableInfo, null, null);
+
+      assertEquals(1, filesMetadata.size());
+
+      List<RowGroupMetadata> rowGroupsMetadata = 
cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .rowGroupsMetadata(tableInfo, (String) null, null);
+
+      assertEquals(1, rowGroupsMetadata.size());
+    } finally {
+      run("analyze table dfs.`%s` drop metadata if exists", tableName);
+    }
+  }
+
+  @Test
+  public void testAnalyzeEmptyRequiredParquetTable() throws Exception {
+    String tableName = "analyze_empty_simple_required";
+
+    run("create table dfs.tmp.%s as select 1 as id, 'a' as name from 
(values(1)) where 1 = 2", tableName);
+
+    File table = new File(dirTestWatcher.getDfsTestTmpDir(), tableName);
+
+    TableInfo tableInfo = getTableInfo(tableName, "tmp");
+
+    TupleMetadata schema = new SchemaBuilder()
+        .add("id", TypeProtos.MinorType.INT)
+        .add("name", TypeProtos.MinorType.VARCHAR)
+        .build();
+
+    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = 
ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder()
+        .put(SchemaPath.getSimplePath("name"),
+            getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR))
+        .put(SchemaPath.getSimplePath("id"),
+            getColumnStatistics(null, null, 0L, TypeProtos.MinorType.INT))
+        .build();
+
+    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder()
+        .tableInfo(tableInfo)
+        .metadataInfo(TABLE_META_INFO)
+        .schema(schema)
+        .location(new Path(table.toURI().getPath()))
+        .columnsStatistics(columnStatistics)
+        .metadataStatistics(Arrays.asList(new StatisticsHolder<>(0L, 
TableStatisticsKind.ROW_COUNT),
+            new StatisticsHolder<>(MetadataType.ALL, 
TableStatisticsKind.ANALYZE_METADATA_LEVEL)))
+        .partitionKeys(Collections.emptyMap())
+        .lastModifiedTime(getMaxLastModified(table))
+        .build();
+
+    try {
+      testBuilder()
+          .sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName)
+          .unOrdered()
+          .baselineColumns("ok", "summary")
+          .baselineValues(true, String.format("Collected / refreshed metadata 
for table [dfs.tmp.%s]", tableName))
+          .go();
+
+      MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .metastoreTableInfo(tableInfo);
+
+      assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
+
+      BaseTableMetadata tableMetadata = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .tableMetadata(tableInfo);
+
+      assertEquals(expectedTableMetadata, tableMetadata);
+
+      List<FileMetadata> filesMetadata = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .filesMetadata(tableInfo, null, null);
+
+      assertEquals(1, filesMetadata.size());
+
+      List<RowGroupMetadata> rowGroupsMetadata = 
cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .rowGroupsMetadata(tableInfo, (String) null, null);
+
+      assertEquals(1, rowGroupsMetadata.size());
+    } finally {
+      run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
+      run("drop table if exists dfs.tmp.`%s`", tableName);
+    }
+  }
+
+  @Test
+  public void testAnalyzeNonEmptyTableWithEmptyFile() throws Exception {
+    String tableName = "parquet_with_empty_file";
+
+    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("parquet", 
"empty", "simple"), Paths.get(tableName));
+
+    TableInfo tableInfo = getTableInfo(tableName, "tmp");
+
+    TupleMetadata schema = new SchemaBuilder()
+        .addNullable("id", TypeProtos.MinorType.BIGINT)
+        .addNullable("name", TypeProtos.MinorType.VARCHAR)
+        .build();
+
+    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = 
ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder()
+        .put(SchemaPath.getSimplePath("name"),
+            getColumnStatistics("Tom", "Tom", 1L, 
TypeProtos.MinorType.VARCHAR))
+        .put(SchemaPath.getSimplePath("id"),
+            getColumnStatistics(2L, 2L, 1L, TypeProtos.MinorType.BIGINT))
+        .build();
+
+    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder()
+        .tableInfo(tableInfo)
+        .metadataInfo(TABLE_META_INFO)
+        .schema(schema)
+        .location(new Path(table.toURI().getPath()))
+        .columnsStatistics(columnStatistics)
+        .metadataStatistics(Arrays.asList(new StatisticsHolder<>(1L, 
TableStatisticsKind.ROW_COUNT),
+            new StatisticsHolder<>(MetadataType.ALL, 
TableStatisticsKind.ANALYZE_METADATA_LEVEL)))
+        .partitionKeys(Collections.emptyMap())
+        .lastModifiedTime(getMaxLastModified(table))
+        .build();
+
+    try {
+      testBuilder()
+          .sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName)
+          .unOrdered()
+          .baselineColumns("ok", "summary")
+          .baselineValues(true, String.format("Collected / refreshed metadata 
for table [dfs.tmp.%s]", tableName))
+          .go();
+
+      MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .metastoreTableInfo(tableInfo);
+
+      assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
+
+      BaseTableMetadata tableMetadata = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .tableMetadata(tableInfo);
+
+      assertEquals(expectedTableMetadata, tableMetadata);
+
+      List<FileMetadata> filesMetadata = cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .filesMetadata(tableInfo, null, null);
+
+      assertEquals(2, filesMetadata.size());
+
+      List<RowGroupMetadata> rowGroupsMetadata = 
cluster.drillbit().getContext()
+          .getMetastoreRegistry()
+          .get()
+          .tables()
+          .basicRequests()
+          .rowGroupsMetadata(tableInfo, (String) null, null);
+
+      assertEquals(2, rowGroupsMetadata.size());
+    } finally {
+      run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
+    }
+  }
+
+  @Test
+  public void testSelectEmptyRequiredParquetTable() throws Exception {
+    String tableName = "empty_simple_required";
+
+    run("create table dfs.tmp.%s as select 1 as id, 'a' as name from 
(values(1)) where 1 = 2", tableName);
+
+    try {
+      testBuilder()
+          .sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA", tableName)
+          .unOrdered()
+          .baselineColumns("ok", "summary")
+          .baselineValues(true, String.format("Collected / refreshed metadata 
for table [dfs.tmp.%s]", tableName))
+          .go();
+
+      String query = "select * from dfs.tmp.`%s`";
+
+      queryBuilder()
+          .sql(query, tableName)
+          .planMatcher()
+          .include("usedMetastore=true")
+          .match();
+
+      testBuilder()
+          .sqlQuery(query, tableName)
+          .unOrdered()
+          .baselineColumns("id", "name")
+          .expectsEmptyResultSet()
+          .go();
+    } finally {
+      run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
+      run("drop table if exists dfs.tmp.`%s`", tableName);
+    }
+  }
+
+  @Test
+  public void testSelectNonEmptyTableWithEmptyFile() throws Exception {
+    String tableName = "parquet_with_empty_file";
+
+    dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "empty", "simple"), 
Paths.get(tableName));
+
+    try {
+      testBuilder()
+          .sqlQuery("ANALYZE TABLE dfs.`%s` REFRESH METADATA", tableName)
+          .unOrdered()
+          .baselineColumns("ok", "summary")
+          .baselineValues(true, String.format("Collected / refreshed metadata 
for table [dfs.default.%s]", tableName))
+          .go();
+
+      String query = "select * from dfs.`%s`";
+
+      queryBuilder()
+          .sql(query, tableName)
+          .planMatcher()
+          .include("usedMetastore=true")
+          .match();
+
+      testBuilder()
+          .sqlQuery(query, tableName)
+          .unOrdered()
+          .baselineColumns("id", "name")
+          .baselineValues(2L, "Tom")
+          .go();
+    } finally {
+      run("analyze table dfs.`%s` drop metadata if exists", tableName);
+    }
+  }
+
   private static <T> ColumnStatistics<T> getColumnStatistics(T minValue, T 
maxValue,
       long rowCount, TypeProtos.MinorType minorType) {
     return new ColumnStatistics<>(

[drill] 08/10: DRILL-7565: ANALYZE TABLE ... REFRESH METADATA does not work for empty Parquet files

Reply via email to