vvysotskyi commented on a change in pull request #2026: DRILL-7330: Implement metadata usage for all format plugins URL: https://github.com/apache/drill/pull/2026#discussion_r395142724
########## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java ########## @@ -103,34 +110,39 @@ // whether metadata for row groups should be collected to create files, partitions and table metadata private final boolean collectMetadata = false; - public BaseParquetMetadataProvider(List<ReadEntryWithPath> entries, - ParquetReaderConfig readerConfig, - String tableName, - Path tableLocation, - TupleMetadata schema, - DrillStatsTable statsTable) { - this(readerConfig, entries, tableName, tableLocation, schema, statsTable); - } + protected BaseParquetMetadataProvider(Builder<?> builder) { + if (builder.entries != null) { + // reuse previously stored metadata + this.entries = builder.entries; + this.tableName = builder.selectionRoot != null ? builder.selectionRoot.toUri().getPath() : ""; + this.tableLocation = builder.selectionRoot; + } else if (builder.selection != null) { + this.entries = new ArrayList<>(); + this.tableName = builder.selection.getSelectionRoot() != null ? builder.selection.getSelectionRoot().toUri().getPath() : ""; + this.tableLocation = builder.selection.getSelectionRoot(); + } else { + // case of hive parquet table + this.entries = new ArrayList<>(); + this.tableName = null; + this.tableLocation = null; + } - public BaseParquetMetadataProvider(ParquetReaderConfig readerConfig, - List<ReadEntryWithPath> entries, - String tableName, - Path tableLocation, - TupleMetadata schema, - DrillStatsTable statsTable) { - this.entries = entries == null ? new ArrayList<>() : entries; - this.readerConfig = readerConfig == null ? ParquetReaderConfig.getDefaultInstance() : readerConfig; - this.tableName = tableName; - this.tableLocation = tableLocation; - this.schema = schema; - this.statsTable = statsTable; - } + SchemaProvider schemaProvider = builder.metadataProviderManager.getSchemaProvider(); + TupleMetadata schema = builder.schema; + // schema passed into the builder has greater priority + if (schema == null && schemaProvider != null) { + try { + schema = schemaProvider.read().getSchema(); Review comment: The highest priority has the inline schema. Also, the schema file may be provided in a table function, but it cannot be used with the inline schema for the same table within the same query. If the table function wasn't specified, but the schema file is present and `store.table.use_schema_file` was enabled, the schema file will be used. In the opposite case, metastore schema will be used. Only a single schema source may be used. Yes, `ANALYZE TABLE` will use the provided schema. In this builder, the `schema` field - is a target resolved schema after group scan deserialization. For the case when the schema is null - will be used `SchemaProvider` which provides schema either from table function or from schema file. If the schema is still null, metastore schema wouldn't be overridden and will be used. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services