vvysotskyi commented on a change in pull request #2026: DRILL-7330: Implement 
metadata usage for all format plugins
URL: https://github.com/apache/drill/pull/2026#discussion_r392716963
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/store/MetastoreFileTableMetadataProvider.java
 ##########
 @@ -291,124 +250,74 @@ public boolean checkMetadataVersion() {
     }
   }
 
-  private ParquetTableMetadataProvider getFallbackTableMetadataProvider() 
throws IOException {
-    if (fallback == null) {
-      fallback = fallbackBuilder == null ? null : fallbackBuilder.build();
-    }
-    return fallback;
-  }
+  public static class Builder<T extends Builder<T>> implements 
FileTableMetadataProviderBuilder<T> {
+    protected final MetastoreMetadataProviderManager metadataProviderManager;
 
-  private void throwIfChanged() {
-    if (basicTablesRequests.hasMetastoreTableInfoChanged(metastoreTableInfo)) {
-      throw 
MetadataException.of(MetadataException.MetadataExceptionType.INCONSISTENT_METADATA);
-    }
-  }
+    // builder for fallback ParquetFileTableMetadataProvider
+    // for the case when required metadata is absent in Metastore
+    protected final TableMetadataProviderBuilder fallback;
 
-  public static class Builder implements 
ParquetFileTableMetadataProviderBuilder {
-    private final MetastoreMetadataProviderManager metadataProviderManager;
+    protected TupleMetadata schema;
 
-    private List<ReadEntryWithPath> entries;
-    private DrillFileSystem fs;
-    private TupleMetadata schema;
+    protected List<String> paths;
 
     private FileSelection selection;
 
-    // builder for fallback ParquetFileTableMetadataProvider
-    // for the case when required metadata is absent in Metastore
-    private final ParquetFileTableMetadataProviderBuilder fallback;
+    private DrillFileSystem fs;
 
     public Builder(MetastoreMetadataProviderManager source) {
-      this.metadataProviderManager = source;
-      this.fallback = new 
ParquetTableMetadataProviderImpl.Builder(FileSystemMetadataProviderManager.init());
+      this(source, new 
SimpleFileTableMetadataProvider.Builder(FileSystemMetadataProviderManager.init()));
     }
 
-    @Override
-    public ParquetFileTableMetadataProviderBuilder 
withEntries(List<ReadEntryWithPath> entries) {
-      this.entries = entries;
-      fallback.withEntries(entries);
-      return this;
+    protected Builder(MetastoreMetadataProviderManager source, 
TableMetadataProviderBuilder fallback) {
+      this.metadataProviderManager = source;
+      this.fallback = fallback;
     }
 
     @Override
-    public ParquetFileTableMetadataProviderBuilder withSelectionRoot(Path 
selectionRoot) {
-      fallback.withSelectionRoot(selectionRoot);
-      return this;
+    public T withSchema(TupleMetadata schema) {
+      this.schema = schema;
+      return self();
     }
 
-    @Override
-    public ParquetFileTableMetadataProviderBuilder withCacheFileRoot(Path 
cacheFileRoot) {
-      fallback.withCacheFileRoot(cacheFileRoot);
-      return this;
+    public T withSelection(FileSelection selection) {
+      this.selection = selection;
+      return self();
     }
 
-    @Override
-    public ParquetFileTableMetadataProviderBuilder 
withReaderConfig(ParquetReaderConfig readerConfig) {
-      fallback.withReaderConfig(readerConfig);
-      return this;
+    public T withFileSystem(DrillFileSystem fs) {
+      this.fs = fs;
+      return self();
     }
 
-    @Override
-    public ParquetFileTableMetadataProviderBuilder 
withFileSystem(DrillFileSystem fs) {
-      fallback.withFileSystem(fs);
-      this.fs = fs;
-      return this;
+    protected T self() {
+      return (T) this;
     }
 
-    @Override
-    public ParquetFileTableMetadataProviderBuilder 
withCorrectCorruptedDates(boolean autoCorrectCorruptedDates) {
-      fallback.withCorrectCorruptedDates(autoCorrectCorruptedDates);
-      return this;
+    public MetastoreMetadataProviderManager metadataProviderManager() {
+      return metadataProviderManager;
     }
 
-    @Override
-    public ParquetFileTableMetadataProviderBuilder withSelection(FileSelection 
selection) {
-      fallback.withSelection(selection);
-      this.selection = selection;
-      return this;
+    public FileSelection selection() {
+      return selection;
     }
 
-    @Override
-    public ParquetFileTableMetadataProviderBuilder withSchema(TupleMetadata 
schema) {
-      fallback.withSchema(schema);
-      this.schema = schema;
-      return this;
+    public DrillFileSystem fs() {
+      return fs;
     }
 
     @Override
-    public ParquetTableMetadataProvider build() throws IOException {
-      MetastoreParquetTableMetadataProvider provider;
-      SchemaProvider schemaProvider = 
metadataProviderManager.getSchemaProvider();
-      ParquetMetadataProvider source = (ParquetTableMetadataProvider) 
metadataProviderManager.getTableMetadataProvider();
-
-      DrillStatsTable statsProvider = 
metadataProviderManager.getStatsProvider();
-      // schema passed into the builder has greater priority
-      try {
-        if (this.schema == null) {
-          schema = schemaProvider != null ? schemaProvider.read().getSchema() 
: null;
-        }
-      } catch (IOException e) {
-        logger.debug("Unable to deserialize schema from schema file for table: 
{}", metadataProviderManager.getTableInfo().name(), e);
-      }
-      if (entries == null) {
-        if (!selection.isExpandedFully()) {
-          entries = DrillFileSystemUtil.listFiles(fs, 
selection.getSelectionRoot(), true).stream()
-              .map(fileStatus -> new 
ReadEntryWithPath(Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath())))
-              .collect(Collectors.toList());
-        } else {
-          entries = selection.getFiles().stream()
-              .map(Path::getPathWithoutSchemeAndAuthority)
-              .map(ReadEntryWithPath::new)
-              .collect(Collectors.toList());
-        }
-      }
-      provider = new MetastoreParquetTableMetadataProvider(entries, 
metadataProviderManager.getMetastoreRegistry(),
-          metadataProviderManager.getTableInfo(), schema, fallback, 
metadataProviderManager.getConfig(), statsProvider);
-      // store results into metadataProviderManager to be able to use them 
when creating new instances
-      // for the case when source wasn't provided or it contains less row 
group metadata than the provider
-      if (source == null || source.getRowGroupsMeta().size() < 
provider.getRowGroupsMeta().size()) {
-        metadataProviderManager.setTableMetadataProvider(provider);
+    public TableMetadataProvider build() throws IOException {
+      if (!selection().isExpandedFully()) {
+        paths = DrillFileSystemUtil.listFiles(fs, 
selection.getSelectionRoot(), true).stream()
+            .map(fileStatus -> 
Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toUri().getPath())
+            .collect(Collectors.toList());
+      } else {
+        paths = selection.getFiles().stream()
+            .map(path -> 
Path.getPathWithoutSchemeAndAuthority(path).toUri().getPath())
+            .collect(Collectors.toList());
 
 Review comment:
   No, it is not cached between the queries, since the file may be deleted, or 
new files may be added, so it would cause wrong results.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to