vvysotskyi commented on a change in pull request #2026: DRILL-7330: Implement metadata usage for all format plugins URL: https://github.com/apache/drill/pull/2026#discussion_r392708620
########## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java ########## @@ -103,34 +110,39 @@ // whether metadata for row groups should be collected to create files, partitions and table metadata private final boolean collectMetadata = false; - public BaseParquetMetadataProvider(List<ReadEntryWithPath> entries, - ParquetReaderConfig readerConfig, - String tableName, - Path tableLocation, - TupleMetadata schema, - DrillStatsTable statsTable) { - this(readerConfig, entries, tableName, tableLocation, schema, statsTable); - } + protected BaseParquetMetadataProvider(Builder<?> builder) { + if (builder.entries != null) { + // reuse previously stored metadata + this.entries = builder.entries; + this.tableName = builder.selectionRoot != null ? builder.selectionRoot.toUri().getPath() : ""; + this.tableLocation = builder.selectionRoot; + } else if (builder.selection != null) { + this.entries = new ArrayList<>(); + this.tableName = builder.selection.getSelectionRoot() != null ? builder.selection.getSelectionRoot().toUri().getPath() : ""; + this.tableLocation = builder.selection.getSelectionRoot(); + } else { + // case of hive parquet table + this.entries = new ArrayList<>(); + this.tableName = null; + this.tableLocation = null; + } - public BaseParquetMetadataProvider(ParquetReaderConfig readerConfig, - List<ReadEntryWithPath> entries, - String tableName, - Path tableLocation, - TupleMetadata schema, - DrillStatsTable statsTable) { - this.entries = entries == null ? new ArrayList<>() : entries; - this.readerConfig = readerConfig == null ? ParquetReaderConfig.getDefaultInstance() : readerConfig; - this.tableName = tableName; - this.tableLocation = tableLocation; - this.schema = schema; - this.statsTable = statsTable; - } + SchemaProvider schemaProvider = builder.metadataProviderManager.getSchemaProvider(); + TupleMetadata schema = builder.schema; + // schema passed into the builder has greater priority + if (schema == null && schemaProvider != null) { + try { + schema = schemaProvider.read().getSchema(); Review comment: This code is connected with schema provisioning. The schema will be used further in the scan using existing schema provisioning rules for formats that support it. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services