This is an automated email from the ASF dual-hosted git repository.
volodymyr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 0acc652 DRILL-7471: DESCRIBE TABLE command fails with
ClassCastException when Metastore is enabled
0acc652 is described below
commit 0acc6526e021509f817cc4b00ae1923b8583c29b
Author: Volodymyr Vysotskyi <[email protected]>
AuthorDate: Fri Dec 6 23:26:19 2019 +0200
DRILL-7471: DESCRIBE TABLE command fails with ClassCastException when
Metastore is enabled
---
.../exec/metastore/analyze/AnalyzeColumnUtils.java | 31 ++--
.../analyze/MetadataControllerContext.java | 20 +++
.../impl/metadata/MetadataControllerBatch.java | 55 ++++---
.../impl/metadata/MetadataHandlerBatch.java | 4 +-
.../ConvertMetadataAggregateToDirectScanRule.java | 6 +-
.../drill/exec/store/ischema/InfoSchemaFilter.java | 10 +-
.../drill/exec/sql/TestMetastoreCommands.java | 175 +++++++++++++++++++--
.../metastore/statistics/ColumnStatisticsKind.java | 20 ++-
.../statistics/ExactStatisticsConstants.java | 1 +
9 files changed, 267 insertions(+), 55 deletions(-)
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
index 296d43d..feeeab7 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/AnalyzeColumnUtils.java
@@ -21,6 +21,7 @@ import org.apache.calcite.sql.SqlKind;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.metastore.statistics.BaseStatisticsKind;
import org.apache.drill.metastore.statistics.ColumnStatisticsKind;
+import org.apache.drill.metastore.statistics.ExactStatisticsConstants;
import org.apache.drill.metastore.statistics.StatisticsKind;
import org.apache.drill.metastore.statistics.TableStatisticsKind;
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
@@ -30,19 +31,19 @@ import java.util.Map;
public class AnalyzeColumnUtils {
private static final String COLUMN_SEPARATOR = "$";
- public static final Map<StatisticsKind, SqlKind> COLUMN_STATISTICS_FUNCTIONS
= ImmutableMap.<StatisticsKind, SqlKind>builder()
+ public static final Map<StatisticsKind<?>, SqlKind>
COLUMN_STATISTICS_FUNCTIONS = ImmutableMap.<StatisticsKind<?>, SqlKind>builder()
.put(ColumnStatisticsKind.MAX_VALUE, SqlKind.MAX)
.put(ColumnStatisticsKind.MIN_VALUE, SqlKind.MIN)
- .put(ColumnStatisticsKind.NON_NULL_COUNT, SqlKind.COUNT)
+ .put(ColumnStatisticsKind.NON_NULL_VALUES_COUNT, SqlKind.COUNT)
.put(TableStatisticsKind.ROW_COUNT, SqlKind.COUNT)
.build();
- public static final Map<StatisticsKind, TypeProtos.MinorType>
COLUMN_STATISTICS_TYPES = ImmutableMap.<StatisticsKind,
TypeProtos.MinorType>builder()
- .put(ColumnStatisticsKind.NON_NULL_COUNT, TypeProtos.MinorType.BIGINT)
+ public static final Map<StatisticsKind<?>, TypeProtos.MinorType>
COLUMN_STATISTICS_TYPES = ImmutableMap.<StatisticsKind<?>,
TypeProtos.MinorType>builder()
+ .put(ColumnStatisticsKind.NON_NULL_VALUES_COUNT,
TypeProtos.MinorType.BIGINT)
.put(TableStatisticsKind.ROW_COUNT, TypeProtos.MinorType.BIGINT)
.build();
- public static final Map<StatisticsKind, SqlKind> META_STATISTICS_FUNCTIONS =
ImmutableMap.<StatisticsKind, SqlKind>builder()
+ public static final Map<StatisticsKind<?>, SqlKind>
META_STATISTICS_FUNCTIONS = ImmutableMap.<StatisticsKind<?>, SqlKind>builder()
.put(TableStatisticsKind.ROW_COUNT, SqlKind.COUNT)
.build();
@@ -65,21 +66,21 @@ public class AnalyzeColumnUtils {
* @param fullName the source of {@link StatisticsKind} to obtain
* @return {@link StatisticsKind} instance
*/
- public static StatisticsKind getStatisticsKind(String fullName) {
+ public static StatisticsKind<?> getStatisticsKind(String fullName) {
String statisticsIdentifier = fullName.split("\\" + COLUMN_SEPARATOR)[1];
switch (statisticsIdentifier) {
- case "minValue":
+ case ExactStatisticsConstants.MIN_VALUE:
return ColumnStatisticsKind.MIN_VALUE;
- case "maxValue":
+ case ExactStatisticsConstants.MAX_VALUE:
return ColumnStatisticsKind.MAX_VALUE;
- case "nullsCount":
+ case ExactStatisticsConstants.NULLS_COUNT:
return ColumnStatisticsKind.NULLS_COUNT;
- case "nonnullrowcount":
- return ColumnStatisticsKind.NON_NULL_COUNT;
- case "rowCount":
+ case ExactStatisticsConstants.NON_NULL_VALUES_COUNT:
+ return ColumnStatisticsKind.NON_NULL_VALUES_COUNT;
+ case ExactStatisticsConstants.ROW_COUNT:
return TableStatisticsKind.ROW_COUNT;
}
- return new BaseStatisticsKind(statisticsIdentifier, false);
+ return new BaseStatisticsKind<>(statisticsIdentifier, false);
}
/**
@@ -93,7 +94,7 @@ public class AnalyzeColumnUtils {
* @param statisticsKind statistics kind
* @return analyze-specific field name which includes actual column name and
statistics kind information
*/
- public static String getColumnStatisticsFieldName(String columnName,
StatisticsKind statisticsKind) {
+ public static String getColumnStatisticsFieldName(String columnName,
StatisticsKind<?> statisticsKind) {
return String.format("column%1$s%2$s%1$s%3$s", COLUMN_SEPARATOR,
statisticsKind.getName(), columnName);
}
@@ -105,7 +106,7 @@ public class AnalyzeColumnUtils {
* @param statisticsKind statistics kind
* @return analyze-specific field name for metadata statistics
*/
- public static String getMetadataStatisticsFieldName(StatisticsKind
statisticsKind) {
+ public static String getMetadataStatisticsFieldName(StatisticsKind<?>
statisticsKind) {
return String.format("metadata%s%s", COLUMN_SEPARATOR,
statisticsKind.getName());
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataControllerContext.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataControllerContext.java
index 157d3a0..b6b6b80 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataControllerContext.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/metastore/analyze/MetadataControllerContext.java
@@ -44,6 +44,7 @@ public class MetadataControllerContext {
private final List<MetadataInfo> metadataToHandle;
private final List<MetadataInfo> metadataToRemove;
private final MetadataType analyzeMetadataLevel;
+ private final boolean multiValueSegments;
private MetadataControllerContext(MetadataControllerContextBuilder builder) {
this.tableInfo = builder.tableInfo;
@@ -54,6 +55,7 @@ public class MetadataControllerContext {
this.metadataToHandle = builder.metadataToHandle;
this.metadataToRemove = builder.metadataToRemove;
this.analyzeMetadataLevel = builder.analyzeMetadataLevel;
+ this.multiValueSegments = builder.multiValueSegments;
}
@JsonProperty
@@ -96,6 +98,18 @@ public class MetadataControllerContext {
return analyzeMetadataLevel;
}
+ /**
+ * Specifies whether metadata controller should create segments with
multiple partition values.
+ * For example, Hive partitions contain multiple partition values within the
same segment.
+ *
+ * @return {@code true} if metadata controller should create segments with
multiple partition values,
+ * {@code false} otherwise
+ */
+ @JsonProperty
+ public boolean multiValueSegments() {
+ return multiValueSegments;
+ }
+
@Override
public String toString() {
return new StringJoiner(",\n",
MetadataControllerContext.class.getSimpleName() + "[", "]")
@@ -123,6 +137,7 @@ public class MetadataControllerContext {
private List<MetadataInfo> metadataToHandle;
private List<MetadataInfo> metadataToRemove;
private MetadataType analyzeMetadataLevel;
+ private boolean multiValueSegments;
public MetadataControllerContextBuilder tableInfo(TableInfo tableInfo) {
this.tableInfo = tableInfo;
@@ -164,6 +179,11 @@ public class MetadataControllerContext {
return this;
}
+ public MetadataControllerContextBuilder multiValueSegments(boolean
multiValueSegments) {
+ this.multiValueSegments = multiValueSegments;
+ return this;
+ }
+
public MetadataControllerContext build() {
Objects.requireNonNull(tableInfo, "tableInfo was not set");
Objects.requireNonNull(location, "location was not set");
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
index ab82769..7ef4741 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataControllerBatch.java
@@ -363,6 +363,7 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
.build();
}
+ @SuppressWarnings("rawtypes")
private List<TableMetadataUnit> getMetadataUnits(TupleReader reader, int
nestingLevel) {
List<TableMetadataUnit> metadataUnits = new ArrayList<>();
@@ -425,6 +426,7 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
return metadataUnits;
}
+ @SuppressWarnings("rawtypes")
private PartitionMetadata getPartitionMetadata(TupleReader reader,
List<StatisticsHolder> metadataStatistics,
Map<SchemaPath, ColumnStatistics> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
@@ -456,11 +458,11 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
.build();
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings("rawtypes")
private BaseTableMetadata getTableMetadata(TupleReader reader,
List<StatisticsHolder> metadataStatistics,
Map<SchemaPath, ColumnStatistics> columnStatistics) {
List<StatisticsHolder> updatedMetaStats = new
ArrayList<>(metadataStatistics);
- updatedMetaStats.add(new
StatisticsHolder(popConfig.getContext().analyzeMetadataLevel(),
TableStatisticsKind.ANALYZE_METADATA_LEVEL));
+ updatedMetaStats.add(new
StatisticsHolder<>(popConfig.getContext().analyzeMetadataLevel(),
TableStatisticsKind.ANALYZE_METADATA_LEVEL));
MetadataInfo metadataInfo = MetadataInfo.builder()
.type(MetadataType.TABLE)
@@ -489,6 +491,7 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
return tableMetadata;
}
+ @SuppressWarnings("rawtypes")
private SegmentMetadata getSegmentMetadata(TupleReader reader,
List<StatisticsHolder> metadataStatistics,
Map<SchemaPath, ColumnStatistics> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
@@ -497,11 +500,17 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
? reader.column(segmentColumns.iterator().next()).scalar().getString()
: MetadataInfo.DEFAULT_SEGMENT_KEY;
- List<String> partitionValues = segmentColumns.stream()
+ // for the case of multi-value segments, there is no nesting
+ // and therefore all values should be used when forming metadata identifier
+ if (popConfig.getContext().multiValueSegments()) {
+ nestingLevel = segmentColumns.size();
+ }
+
+ List<String> allPartitionValues = segmentColumns.stream()
.limit(nestingLevel)
.map(columnName -> reader.column(columnName).scalar().getString())
.collect(Collectors.toList());
- String metadataIdentifier =
MetadataIdentifierUtils.getMetadataIdentifierKey(partitionValues);
+ String metadataIdentifier =
MetadataIdentifierUtils.getMetadataIdentifierKey(allPartitionValues);
MetadataInfo metadataInfo = MetadataInfo.builder()
.type(MetadataType.SEGMENT)
@@ -509,6 +518,14 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
.identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null))
.build();
+ int segmentLevel = nestingLevel - 1;
+
+ // for the case of multi-value segments, there is no nesting,
+ // so all partition column values should be used
+ List<String> partitionValues = popConfig.getContext().multiValueSegments()
+ ? allPartitionValues
+ : Collections.singletonList(allPartitionValues.get(segmentLevel));
+
return SegmentMetadata.builder()
.tableInfo(tableInfo)
.metadataInfo(metadataInfo)
@@ -516,13 +533,14 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
.metadataStatistics(metadataStatistics)
.path(new
Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString()))
.locations(getIncomingLocations(reader))
- .column(segmentColumns.size() > 0 ?
SchemaPath.getSimplePath(segmentColumns.get(nestingLevel - 1)) : null)
+ .column(segmentColumns.size() > 0 ?
SchemaPath.getSimplePath(segmentColumns.get(segmentLevel)) : null)
.partitionValues(partitionValues)
.lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString()))
.schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString()))
.build();
}
+ @SuppressWarnings("rawtypes")
private FileMetadata getFileMetadata(TupleReader reader,
List<StatisticsHolder> metadataStatistics,
Map<SchemaPath, ColumnStatistics> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
@@ -556,6 +574,7 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
.build();
}
+ @SuppressWarnings("rawtypes")
private RowGroupMetadata getRowGroupMetadata(TupleReader
reader,List<StatisticsHolder> metadataStatistics,
Map<SchemaPath, ColumnStatistics> columnStatistics, int nestingLevel) {
@@ -594,7 +613,7 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
.build();
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings("rawtypes")
private Map<SchemaPath, ColumnStatistics> getColumnStatistics(TupleReader
reader, TupleMetadata columnMetadata, Long rowCount) {
Multimap<String, StatisticsHolder> columnStatistics =
ArrayListMultimap.create();
Map<String, TypeProtos.MinorType> columnTypes = new HashMap<>();
@@ -602,9 +621,9 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
String fieldName = AnalyzeColumnUtils.getColumnName(column.name());
if (AnalyzeColumnUtils.isColumnStatisticsField(column.name())) {
- StatisticsKind statisticsKind =
AnalyzeColumnUtils.getStatisticsKind(column.name());
+ StatisticsKind<?> statisticsKind =
AnalyzeColumnUtils.getStatisticsKind(column.name());
columnStatistics.put(fieldName,
- new
StatisticsHolder(getConvertedColumnValue(reader.column(column.name())),
statisticsKind));
+ new
StatisticsHolder<>(getConvertedColumnValue(reader.column(column.name())),
statisticsKind));
if
(statisticsKind.getName().equalsIgnoreCase(ColumnStatisticsKind.MIN_VALUE.getName())
||
statisticsKind.getName().equalsIgnoreCase(ColumnStatisticsKind.MAX_VALUE.getName()))
{
columnTypes.putIfAbsent(fieldName, column.type());
@@ -617,13 +636,13 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
Map<String, StatisticsHolder> nullsCountColumnStatistics = new
HashMap<>();
columnStatistics.asMap().forEach((key, value) ->
value.stream()
- .filter(statisticsHolder -> statisticsHolder.getStatisticsKind()
== ColumnStatisticsKind.NON_NULL_COUNT)
+ .filter(statisticsHolder -> statisticsHolder.getStatisticsKind()
== ColumnStatisticsKind.NON_NULL_VALUES_COUNT)
.findAny()
.map(statisticsHolder -> (Long)
statisticsHolder.getStatisticsValue())
.ifPresent(nonNullCount ->
nullsCountColumnStatistics.put(
key,
- new StatisticsHolder(rowCount - nonNullCount,
ColumnStatisticsKind.NULLS_COUNT))));
+ new StatisticsHolder<>(rowCount - nonNullCount,
ColumnStatisticsKind.NULLS_COUNT))));
nullsCountColumnStatistics.forEach(columnStatistics::put);
}
@@ -631,11 +650,11 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
Map<SchemaPath, ColumnStatistics> resultingStats = new HashMap<>();
columnStatistics.asMap().forEach((fieldName, statisticsHolders) ->
- resultingStats.put(SchemaPath.parseFromString(fieldName), new
ColumnStatistics(statisticsHolders, columnTypes.get(fieldName))));
+ resultingStats.put(SchemaPath.parseFromString(fieldName), new
ColumnStatistics<>(statisticsHolders, columnTypes.get(fieldName))));
return resultingStats;
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings("rawtypes")
private List<StatisticsHolder> getMetadataStatistics(TupleReader reader,
TupleMetadata columnMetadata) {
List<StatisticsHolder> metadataStatistics = new ArrayList<>();
String rgs = columnNamesOptions.rowGroupStart();
@@ -644,15 +663,15 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
String columnName = column.name();
ObjectReader objectReader = reader.column(columnName);
if (AnalyzeColumnUtils.isMetadataStatisticsField(columnName)) {
- metadataStatistics.add(new StatisticsHolder(objectReader.getObject(),
+ metadataStatistics.add(new StatisticsHolder<>(objectReader.getObject(),
AnalyzeColumnUtils.getStatisticsKind(columnName)));
} else if (!objectReader.isNull()) {
if (columnName.equals(rgs)) {
- metadataStatistics.add(new
StatisticsHolder(Long.parseLong(objectReader.scalar().getString()),
- new BaseStatisticsKind(ExactStatisticsConstants.START, true)));
+ metadataStatistics.add(new
StatisticsHolder<>(Long.parseLong(objectReader.scalar().getString()),
+ new BaseStatisticsKind<>(ExactStatisticsConstants.START, true)));
} else if (columnName.equals(rgl)) {
- metadataStatistics.add(new
StatisticsHolder(Long.parseLong(objectReader.scalar().getString()),
- new BaseStatisticsKind(ExactStatisticsConstants.LENGTH, true)));
+ metadataStatistics.add(new
StatisticsHolder<>(Long.parseLong(objectReader.scalar().getString()),
+ new BaseStatisticsKind<>(ExactStatisticsConstants.LENGTH,
true)));
}
}
}
@@ -664,7 +683,7 @@ public class MetadataControllerBatch extends
AbstractBinaryRecordBatch<MetadataC
List<FieldConverter> fieldConverters = new ArrayList<>();
int fieldId = 0;
- for (VectorWrapper wrapper : right) {
+ for (VectorWrapper<?> wrapper : right) {
if
(wrapper.getField().getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD))
{
continue;
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataHandlerBatch.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataHandlerBatch.java
index 22e90fa..044d140 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataHandlerBatch.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/metadata/MetadataHandlerBatch.java
@@ -263,7 +263,7 @@ public class MetadataHandlerBatch extends
AbstractSingleRecordBatch<MetadataHand
baseMetadata.getColumnsStatistics().entrySet().stream()
.sorted(Comparator.comparing(e -> e.getKey().getRootSegmentPath()))
.forEach(entry -> {
- for (StatisticsKind statisticsKind :
AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet()) {
+ for (StatisticsKind<?> statisticsKind :
AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet()) {
MinorType type =
AnalyzeColumnUtils.COLUMN_STATISTICS_TYPES.get(statisticsKind);
type = type != null ? type : entry.getValue().getComparatorType();
schemaBuilder.addNullable(
@@ -272,7 +272,7 @@ public class MetadataHandlerBatch extends
AbstractSingleRecordBatch<MetadataHand
}
});
- for (StatisticsKind statisticsKind :
AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet()) {
+ for (StatisticsKind<?> statisticsKind :
AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet()) {
schemaBuilder.addNullable(
AnalyzeColumnUtils.getMetadataStatisticsFieldName(statisticsKind),
AnalyzeColumnUtils.COLUMN_STATISTICS_TYPES.get(statisticsKind));
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
index 43f6383..ed4038f 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertMetadataAggregateToDirectScanRule.java
@@ -201,16 +201,16 @@ public class ConvertMetadataAggregateToDirectScanRule
extends RelOptRule {
// populates record list with row group column metadata
for (SchemaPath schemaPath : interestingColumns) {
- ColumnStatistics columnStatistics =
rowGroupMetadata.getColumnsStatistics().get(schemaPath);
+ ColumnStatistics<?> columnStatistics =
rowGroupMetadata.getColumnsStatistics().get(schemaPath);
if (IsPredicate.isNullOrEmpty(columnStatistics)) {
logger.debug("Statistics for {} column wasn't found within {} row
group.", schemaPath, path);
return null;
}
- for (StatisticsKind statisticsKind :
AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet()) {
+ for (StatisticsKind<?> statisticsKind :
AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet()) {
Object statsValue;
if
(statisticsKind.getName().equalsIgnoreCase(TableStatisticsKind.ROW_COUNT.getName()))
{
statsValue =
TableStatisticsKind.ROW_COUNT.getValue(rowGroupMetadata);
- } else if
(statisticsKind.getName().equalsIgnoreCase(ColumnStatisticsKind.NON_NULL_COUNT.getName()))
{
+ } else if
(statisticsKind.getName().equalsIgnoreCase(ColumnStatisticsKind.NON_NULL_VALUES_COUNT.getName()))
{
statsValue =
TableStatisticsKind.ROW_COUNT.getValue(rowGroupMetadata) -
ColumnStatisticsKind.NULLS_COUNT.getFrom(columnStatistics);
} else {
statsValue = columnStatistics.get(statisticsKind);
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
index 25bbc7d..b1f1d25 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
@@ -20,6 +20,7 @@ package org.apache.drill.exec.store.ischema;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.annotation.JsonTypeName;
import org.apache.drill.exec.store.ischema.InfoSchemaFilter.ExprNode.Type;
import org.apache.drill.shaded.guava.com.google.common.base.Joiner;
@@ -45,6 +46,8 @@ public class InfoSchemaFilter {
return exprRoot;
}
+ // include type-info to be able to deserialize subclasses correctly
+ @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "@class")
public static class ExprNode {
@JsonProperty
public Type type;
@@ -68,7 +71,8 @@ public class InfoSchemaFilter {
public List<ExprNode> args;
@JsonCreator
- public FunctionExprNode(String function, List<ExprNode> args) {
+ public FunctionExprNode(@JsonProperty("function") String function,
+ @JsonProperty("args") List<ExprNode> args) {
super(Type.FUNCTION);
this.function = function;
this.args = args;
@@ -90,7 +94,7 @@ public class InfoSchemaFilter {
public String field;
@JsonCreator
- public FieldExprNode(String field) {
+ public FieldExprNode(@JsonProperty("field") String field) {
super(Type.FIELD);
this.field = field;
}
@@ -106,7 +110,7 @@ public class InfoSchemaFilter {
public String value;
@JsonCreator
- public ConstantExprNode(String value) {
+ public ConstantExprNode(@JsonProperty("value") String value) {
super(Type.CONSTANT);
this.value = value;
}
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
index 6cf9cc7..ff2108b 100644
---
a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
+++
b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestMetastoreCommands.java
@@ -62,6 +62,9 @@ import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.nio.file.Paths;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -78,6 +81,7 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
@Category({SlowTest.class, MetastoreTest.class})
+@SuppressWarnings({"rawtypes", "unchecked"})
public class TestMetastoreCommands extends ClusterTest {
private static final TupleMetadata SCHEMA = new SchemaBuilder()
@@ -360,8 +364,8 @@ public class TestMetastoreCommands extends ClusterTest {
.columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS)
.metadataStatistics(Arrays.asList(
new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT),
- new StatisticsHolder<>(1196L, new
BaseStatisticsKind(ExactStatisticsConstants.LENGTH, true)),
- new StatisticsHolder<>(4L, new
BaseStatisticsKind(ExactStatisticsConstants.START, true))))
+ new StatisticsHolder<>(1196L, new
BaseStatisticsKind<>(ExactStatisticsConstants.LENGTH, true)),
+ new StatisticsHolder<>(4L, new
BaseStatisticsKind<>(ExactStatisticsConstants.START, true))))
.path(new Path(tablePath, "1994/Q1/orders_94_q1.parquet"))
.build();
@@ -438,7 +442,7 @@ public class TestMetastoreCommands extends ClusterTest {
.columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS)
.metadataStatistics(Collections.singletonList(new
StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT)))
.locations(ImmutableSet.of(new Path(tablePath,
"1994/Q1/orders_94_q1.parquet")))
- .partitionValues(Arrays.asList("1994", "Q1"))
+ .partitionValues(Collections.singletonList("Q1"))
.build();
// verify segment for 1994
@@ -963,7 +967,6 @@ public class TestMetastoreCommands extends ClusterTest {
}
@Test
- @SuppressWarnings("unchecked")
public void testIncrementalAnalyzeNewParentSegment() throws Exception {
String tableName = "multilevel/parquetNewParentSegment";
@@ -979,7 +982,7 @@ public class TestMetastoreCommands extends ClusterTest {
columnStatistics.cloneWith(new ColumnStatistics<>(
Arrays.asList(
new StatisticsHolder<>(160L, TableStatisticsKind.ROW_COUNT),
- new StatisticsHolder<>(160L,
ColumnStatisticsKind.NON_NULL_COUNT)))));
+ new StatisticsHolder<>(160L,
ColumnStatisticsKind.NON_NULL_VALUES_COUNT)))));
updatedStatistics.computeIfPresent(SchemaPath.getSimplePath("dir0"),
(logicalExpressions, columnStatistics) ->
columnStatistics.cloneWith(new ColumnStatistics<>(
@@ -1055,7 +1058,6 @@ public class TestMetastoreCommands extends ClusterTest {
}
@Test
- @SuppressWarnings("unchecked")
public void testIncrementalAnalyzeNewChildSegment() throws Exception {
String tableName = "multilevel/parquetNewChildSegment";
@@ -1071,7 +1073,7 @@ public class TestMetastoreCommands extends ClusterTest {
columnStatistics.cloneWith(new ColumnStatistics(
Arrays.asList(
new StatisticsHolder<>(130L, TableStatisticsKind.ROW_COUNT),
- new StatisticsHolder<>(130L,
ColumnStatisticsKind.NON_NULL_COUNT)))));
+ new StatisticsHolder<>(130L,
ColumnStatisticsKind.NON_NULL_VALUES_COUNT)))));
updatedStatistics.computeIfPresent(SchemaPath.getSimplePath("dir1"),
(logicalExpressions, columnStatistics) ->
columnStatistics.cloneWith(new ColumnStatistics(
@@ -1140,7 +1142,6 @@ public class TestMetastoreCommands extends ClusterTest {
}
@Test
- @SuppressWarnings("unchecked")
public void testIncrementalAnalyzeNewFile() throws Exception {
String tableName = "multilevel/parquetNewFile";
@@ -1156,7 +1157,7 @@ public class TestMetastoreCommands extends ClusterTest {
columnStatistics.cloneWith(new ColumnStatistics(
Arrays.asList(
new StatisticsHolder<>(130L, TableStatisticsKind.ROW_COUNT),
- new StatisticsHolder<>(130L,
ColumnStatisticsKind.NON_NULL_COUNT)))));
+ new StatisticsHolder<>(130L,
ColumnStatisticsKind.NON_NULL_VALUES_COUNT)))));
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder()
.tableInfo(tableInfo)
@@ -1556,7 +1557,6 @@ public class TestMetastoreCommands extends ClusterTest {
}
@Test
- @SuppressWarnings("unchecked")
public void testIncrementalAnalyzeUpdatedFile() throws Exception {
String tableName = "multilevel/parquetUpdatedFile";
@@ -1772,7 +1772,6 @@ public class TestMetastoreCommands extends ClusterTest {
}
@Test
- @SuppressWarnings("unchecked")
public void testDefaultSegment() throws Exception {
String tableName = "multilevel/parquet/1994/Q1";
File table = dirTestWatcher.copyResourceToTestTmp(Paths.get(tableName),
Paths.get(tableName));
@@ -1804,7 +1803,7 @@ public class TestMetastoreCommands extends ClusterTest {
columnStatistics.cloneWith(new ColumnStatistics(
Arrays.asList(
new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT),
- new StatisticsHolder<>(10L,
ColumnStatisticsKind.NON_NULL_COUNT)))));
+ new StatisticsHolder<>(10L,
ColumnStatisticsKind.NON_NULL_VALUES_COUNT)))));
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder()
.tableInfo(tableInfo)
@@ -2979,6 +2978,156 @@ public class TestMetastoreCommands extends ClusterTest {
}
}
+ @Test
+ public void testDescribeWithMetastore() throws Exception {
+ String tableName = "describeTable";
+
+ File table =
dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"),
Paths.get(tableName));
+
+ try {
+ client.alterSession(PlannerSettings.STATISTICS_USE.getOptionName(),
true);
+
+ testBuilder()
+ .sqlQuery("analyze table dfs.tmp.`%s` REFRESH METADATA", tableName)
+ .unOrdered()
+ .baselineColumns("ok", "summary")
+ .baselineValues(true, String.format("Collected / refreshed metadata
for table [dfs.tmp.%s]", tableName))
+ .go();
+
+ testBuilder()
+ .sqlQuery("describe table dfs.tmp.`%s`", tableName)
+ .unOrdered()
+ .baselineColumns("COLUMN_NAME", "DATA_TYPE", "IS_NULLABLE")
+ .baselineValues("dir0", "CHARACTER VARYING", "YES")
+ .baselineValues("dir1", "CHARACTER VARYING", "YES")
+ .baselineValues("o_orderkey", "INTEGER", "NO")
+ .baselineValues("o_custkey", "INTEGER", "NO")
+ .baselineValues("o_orderstatus", "CHARACTER VARYING", "NO")
+ .baselineValues("o_totalprice", "DOUBLE", "NO")
+ .baselineValues("o_orderdate", "DATE", "NO")
+ .baselineValues("o_orderpriority", "CHARACTER VARYING", "NO")
+ .baselineValues("o_clerk", "CHARACTER VARYING", "NO")
+ .baselineValues("o_shippriority", "INTEGER", "NO")
+ .baselineValues("o_comment", "CHARACTER VARYING", "NO")
+ .go();
+ } finally {
+ run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
+ client.resetSession(PlannerSettings.STATISTICS_USE.getOptionName());
+
+ FileUtils.deleteQuietly(table);
+ }
+ }
+
+ @Test
+ public void testSelectFromInfoSchemaTablesWithMetastore() throws Exception {
+ String tableName = "tableInInfoSchema";
+
+ File table =
dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"),
Paths.get(tableName));
+
+ try {
+ testBuilder()
+ .sqlQuery("analyze table dfs.tmp.`%s` REFRESH METADATA", tableName)
+ .unOrdered()
+ .baselineColumns("ok", "summary")
+ .baselineValues(true, String.format("Collected / refreshed metadata
for table [dfs.tmp.%s]", tableName))
+ .go();
+
+ LocalDateTime localDateTime =
getLocalDateTime(getMaxLastModified(table));
+
+ String absolutePath = new
Path(table.toURI().getPath()).toUri().getPath();
+ testBuilder()
+ .sqlQuery("select * from information_schema.`tables` where
TABLE_NAME='%s'", tableName)
+ .unOrdered()
+ .baselineColumns("TABLE_CATALOG", "TABLE_SCHEMA", "TABLE_NAME",
"TABLE_TYPE", "TABLE_SOURCE", "LOCATION", "NUM_ROWS", "LAST_MODIFIED_TIME")
+ .baselineValues("DRILL", "dfs.tmp", tableName, "TABLE", "PARQUET",
absolutePath, 120L, localDateTime)
+ .go();
+ } finally {
+ run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
+
+ FileUtils.deleteQuietly(table);
+ }
+ }
+
+ private LocalDateTime getLocalDateTime(long maxLastModified) {
+ return Instant.ofEpochMilli(maxLastModified)
+ .atZone(ZoneId.of("UTC"))
+ .withZoneSameLocal(ZoneId.systemDefault())
+ .toLocalDateTime();
+ }
+
+ @Test
+ public void testSelectFromInfoSchemaColumnsWithMetastore() throws Exception {
+ String tableName = "columnInInfoSchema";
+
+ File table =
dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"),
Paths.get(tableName));
+
+ try {
+ client.alterSession(PlannerSettings.STATISTICS_USE.getOptionName(),
true);
+
+ testBuilder()
+ .sqlQuery("analyze table dfs.tmp.`%s` REFRESH METADATA", tableName)
+ .unOrdered()
+ .baselineColumns("ok", "summary")
+ .baselineValues(true, String.format("Collected / refreshed metadata
for table [dfs.tmp.%s]", tableName))
+ .go();
+
+ testBuilder()
+ .sqlQuery("select * from information_schema.`columns` where
TABLE_NAME='%s' and COLUMN_NAME in ('dir0', 'o_orderkey', 'o_totalprice')",
tableName)
+ .unOrdered()
+ .baselineColumns("TABLE_CATALOG", "TABLE_SCHEMA", "TABLE_NAME",
"COLUMN_NAME", "ORDINAL_POSITION",
+ "COLUMN_DEFAULT", "IS_NULLABLE", "DATA_TYPE",
"CHARACTER_MAXIMUM_LENGTH", "CHARACTER_OCTET_LENGTH",
+ "NUMERIC_PRECISION", "NUMERIC_PRECISION_RADIX", "NUMERIC_SCALE",
"DATETIME_PRECISION", "INTERVAL_TYPE",
+ "INTERVAL_PRECISION", "COLUMN_SIZE", "COLUMN_FORMAT",
"NUM_NULLS", "MIN_VAL", "MAX_VAL", "NDV", "EST_NUM_NON_NULLS", "IS_NESTED")
+ .baselineValues("DRILL", "dfs.tmp", tableName, "dir0", 1, null,
"YES", "CHARACTER VARYING",
+ 65535, 65535, null, null, null, null, null, null, 65535, null,
0L, "1994", "1996", null, null, false)
+ .baselineValues("DRILL", "dfs.tmp", tableName, "o_orderkey", 3,
null, "NO", "INTEGER",
+ null, null, 0, 2, 0, null, null, null, 11, null, 0L, "1",
"1319", 119.0, 120.0, false)
+ .baselineValues("DRILL", "dfs.tmp", tableName, "o_totalprice", 6,
null, "NO", "DOUBLE",
+ null, null, 0, 2, 0, null, null, null, 24, null, 0L, "3266.69",
"350110.21", 120.0, 120.0, false)
+ .go();
+ } finally {
+ run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
+ client.resetSession(PlannerSettings.STATISTICS_USE.getOptionName());
+
+ FileUtils.deleteQuietly(table);
+ }
+ }
+
+ @Test
+ public void testSelectFromInfoSchemaPartitionsWithMetastore() throws
Exception {
+ String tableName = "partitionInInfoSchema";
+
+ File table =
dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"),
Paths.get(tableName));
+
+ try {
+ client.resetSession(ExecConstants.SLICE_TARGET);
+ testBuilder()
+ .sqlQuery("analyze table dfs.tmp.`%s` REFRESH METADATA", tableName)
+ .unOrdered()
+ .baselineColumns("ok", "summary")
+ .baselineValues(true, String.format("Collected / refreshed metadata
for table [dfs.tmp.%s]", tableName))
+ .go();
+
+ File seg1994q1 = new File(table, "1994/Q1");
+ File seg1995q2 = new File(table, "1995/Q2");
+ testBuilder()
+ .sqlQuery("select * from information_schema.`partitions` where
TABLE_NAME='%s' and METADATA_IDENTIFIER in ('1994/Q1', '1995/Q2') order by
LOCATION", tableName)
+ .unOrdered()
+ .baselineColumns("TABLE_CATALOG", "TABLE_SCHEMA", "TABLE_NAME",
"METADATA_KEY", "METADATA_TYPE",
+ "METADATA_IDENTIFIER", "PARTITION_COLUMN", "PARTITION_VALUE",
"LOCATION", "LAST_MODIFIED_TIME")
+ .baselineValues("DRILL", "dfs.tmp", tableName, "1994", "SEGMENT",
"1994/Q1", "`dir1`", "Q1",
+ new Path(seg1994q1.toURI().getPath()).toUri().getPath(),
getLocalDateTime(getMaxLastModified(seg1994q1)))
+ .baselineValues("DRILL", "dfs.tmp", tableName, "1995", "SEGMENT",
"1995/Q2", "`dir1`", "Q2",
+ new Path(seg1995q2.toURI().getPath()).toUri().getPath(),
getLocalDateTime(getMaxLastModified(seg1995q2)))
+ .go();
+ } finally {
+ run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
+ client.alterSession(ExecConstants.SLICE_TARGET, 1);
+
+ FileUtils.deleteQuietly(table);
+ }
+ }
+
private static <T> ColumnStatistics<T> getColumnStatistics(T minValue, T
maxValue,
long rowCount, TypeProtos.MinorType minorType) {
return new ColumnStatistics<>(
@@ -2986,7 +3135,7 @@ public class TestMetastoreCommands extends ClusterTest {
new StatisticsHolder<>(minValue, ColumnStatisticsKind.MIN_VALUE),
new StatisticsHolder<>(maxValue, ColumnStatisticsKind.MAX_VALUE),
new StatisticsHolder<>(rowCount, TableStatisticsKind.ROW_COUNT),
- new StatisticsHolder<>(rowCount,
ColumnStatisticsKind.NON_NULL_COUNT),
+ new StatisticsHolder<>(rowCount,
ColumnStatisticsKind.NON_NULL_VALUES_COUNT),
new StatisticsHolder<>(0L, ColumnStatisticsKind.NULLS_COUNT)),
minorType);
}
diff --git
a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ColumnStatisticsKind.java
b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ColumnStatisticsKind.java
index 1b40b7e..613b602 100644
---
a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ColumnStatisticsKind.java
+++
b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ColumnStatisticsKind.java
@@ -92,7 +92,25 @@ public class ColumnStatisticsKind<T> extends
BaseStatisticsKind<T> implements Co
};
/**
- * Column statistics kind which represents number of non-null values for the
specific column.
+ * Column statistics kind which represents exact number of non-null values
for the specific column.
+ */
+ public static final ColumnStatisticsKind<Long> NON_NULL_VALUES_COUNT =
+ new
ColumnStatisticsKind<Long>(ExactStatisticsConstants.NON_NULL_VALUES_COUNT,
true) {
+ @Override
+ public Long mergeStatistics(List<? extends ColumnStatistics>
statisticsList) {
+ long nonNullRowCount = 0;
+ for (ColumnStatistics<?> statistics : statisticsList) {
+ Long nnRowCount = statistics.get(this);
+ if (nnRowCount != null) {
+ nonNullRowCount += nnRowCount;
+ }
+ }
+ return nonNullRowCount;
+ }
+ };
+
+ /**
+ * Column statistics kind which represents estimated number of non-null
values for the specific column.
*/
public static final ColumnStatisticsKind<Double> NON_NULL_COUNT =
new ColumnStatisticsKind<Double>(Statistic.NNROWCOUNT, false) {
diff --git
a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ExactStatisticsConstants.java
b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ExactStatisticsConstants.java
index 38174a9..b96b322 100644
---
a/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ExactStatisticsConstants.java
+++
b/metastore/metastore-api/src/main/java/org/apache/drill/metastore/statistics/ExactStatisticsConstants.java
@@ -22,6 +22,7 @@ public interface ExactStatisticsConstants {
String MAX_VALUE = "maxValue";
String ROW_COUNT = "rowCount";
String NULLS_COUNT = "nullsCount";
+ String NON_NULL_VALUES_COUNT = "nonNullValuesCount";
String START = "start";
String LENGTH = "length";