vvysotskyi commented on a change in pull request #2026: DRILL-7330: Implement metadata usage for all format plugins URL: https://github.com/apache/drill/pull/2026#discussion_r392716963
########## File path: exec/java-exec/src/main/java/org/apache/drill/exec/metastore/store/MetastoreFileTableMetadataProvider.java ########## @@ -291,124 +250,74 @@ public boolean checkMetadataVersion() { } } - private ParquetTableMetadataProvider getFallbackTableMetadataProvider() throws IOException { - if (fallback == null) { - fallback = fallbackBuilder == null ? null : fallbackBuilder.build(); - } - return fallback; - } + public static class Builder<T extends Builder<T>> implements FileTableMetadataProviderBuilder<T> { + protected final MetastoreMetadataProviderManager metadataProviderManager; - private void throwIfChanged() { - if (basicTablesRequests.hasMetastoreTableInfoChanged(metastoreTableInfo)) { - throw MetadataException.of(MetadataException.MetadataExceptionType.INCONSISTENT_METADATA); - } - } + // builder for fallback ParquetFileTableMetadataProvider + // for the case when required metadata is absent in Metastore + protected final TableMetadataProviderBuilder fallback; - public static class Builder implements ParquetFileTableMetadataProviderBuilder { - private final MetastoreMetadataProviderManager metadataProviderManager; + protected TupleMetadata schema; - private List<ReadEntryWithPath> entries; - private DrillFileSystem fs; - private TupleMetadata schema; + protected List<String> paths; private FileSelection selection; - // builder for fallback ParquetFileTableMetadataProvider - // for the case when required metadata is absent in Metastore - private final ParquetFileTableMetadataProviderBuilder fallback; + private DrillFileSystem fs; public Builder(MetastoreMetadataProviderManager source) { - this.metadataProviderManager = source; - this.fallback = new ParquetTableMetadataProviderImpl.Builder(FileSystemMetadataProviderManager.init()); + this(source, new SimpleFileTableMetadataProvider.Builder(FileSystemMetadataProviderManager.init())); } - @Override - public ParquetFileTableMetadataProviderBuilder withEntries(List<ReadEntryWithPath> entries) { - this.entries = entries; - fallback.withEntries(entries); - return this; + protected Builder(MetastoreMetadataProviderManager source, TableMetadataProviderBuilder fallback) { + this.metadataProviderManager = source; + this.fallback = fallback; } @Override - public ParquetFileTableMetadataProviderBuilder withSelectionRoot(Path selectionRoot) { - fallback.withSelectionRoot(selectionRoot); - return this; + public T withSchema(TupleMetadata schema) { + this.schema = schema; + return self(); } - @Override - public ParquetFileTableMetadataProviderBuilder withCacheFileRoot(Path cacheFileRoot) { - fallback.withCacheFileRoot(cacheFileRoot); - return this; + public T withSelection(FileSelection selection) { + this.selection = selection; + return self(); } - @Override - public ParquetFileTableMetadataProviderBuilder withReaderConfig(ParquetReaderConfig readerConfig) { - fallback.withReaderConfig(readerConfig); - return this; + public T withFileSystem(DrillFileSystem fs) { + this.fs = fs; + return self(); } - @Override - public ParquetFileTableMetadataProviderBuilder withFileSystem(DrillFileSystem fs) { - fallback.withFileSystem(fs); - this.fs = fs; - return this; + protected T self() { + return (T) this; } - @Override - public ParquetFileTableMetadataProviderBuilder withCorrectCorruptedDates(boolean autoCorrectCorruptedDates) { - fallback.withCorrectCorruptedDates(autoCorrectCorruptedDates); - return this; + public MetastoreMetadataProviderManager metadataProviderManager() { + return metadataProviderManager; } - @Override - public ParquetFileTableMetadataProviderBuilder withSelection(FileSelection selection) { - fallback.withSelection(selection); - this.selection = selection; - return this; + public FileSelection selection() { + return selection; } - @Override - public ParquetFileTableMetadataProviderBuilder withSchema(TupleMetadata schema) { - fallback.withSchema(schema); - this.schema = schema; - return this; + public DrillFileSystem fs() { + return fs; } @Override - public ParquetTableMetadataProvider build() throws IOException { - MetastoreParquetTableMetadataProvider provider; - SchemaProvider schemaProvider = metadataProviderManager.getSchemaProvider(); - ParquetMetadataProvider source = (ParquetTableMetadataProvider) metadataProviderManager.getTableMetadataProvider(); - - DrillStatsTable statsProvider = metadataProviderManager.getStatsProvider(); - // schema passed into the builder has greater priority - try { - if (this.schema == null) { - schema = schemaProvider != null ? schemaProvider.read().getSchema() : null; - } - } catch (IOException e) { - logger.debug("Unable to deserialize schema from schema file for table: {}", metadataProviderManager.getTableInfo().name(), e); - } - if (entries == null) { - if (!selection.isExpandedFully()) { - entries = DrillFileSystemUtil.listFiles(fs, selection.getSelectionRoot(), true).stream() - .map(fileStatus -> new ReadEntryWithPath(Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()))) - .collect(Collectors.toList()); - } else { - entries = selection.getFiles().stream() - .map(Path::getPathWithoutSchemeAndAuthority) - .map(ReadEntryWithPath::new) - .collect(Collectors.toList()); - } - } - provider = new MetastoreParquetTableMetadataProvider(entries, metadataProviderManager.getMetastoreRegistry(), - metadataProviderManager.getTableInfo(), schema, fallback, metadataProviderManager.getConfig(), statsProvider); - // store results into metadataProviderManager to be able to use them when creating new instances - // for the case when source wasn't provided or it contains less row group metadata than the provider - if (source == null || source.getRowGroupsMeta().size() < provider.getRowGroupsMeta().size()) { - metadataProviderManager.setTableMetadataProvider(provider); + public TableMetadataProvider build() throws IOException { + if (!selection().isExpandedFully()) { + paths = DrillFileSystemUtil.listFiles(fs, selection.getSelectionRoot(), true).stream() + .map(fileStatus -> Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toUri().getPath()) + .collect(Collectors.toList()); + } else { + paths = selection.getFiles().stream() + .map(path -> Path.getPathWithoutSchemeAndAuthority(path).toUri().getPath()) + .collect(Collectors.toList()); Review comment: No, it is not cached between the queries, since the file may be deleted, or new files may be added, so it would cause wrong results. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services