vvysotskyi commented on a change in pull request #2026: DRILL-7330: Implement 
metadata usage for all format plugins
URL: https://github.com/apache/drill/pull/2026#discussion_r392715737
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/metastore/store/MetastoreFileTableMetadataProvider.java
 ##########
 @@ -15,149 +15,108 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.drill.exec.metastore;
+package org.apache.drill.exec.metastore.store;
 
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.exception.MetadataException;
-import 
org.apache.drill.exec.metastore.MetastoreMetadataProviderManager.MetastoreMetadataProviderConfig;
+import org.apache.drill.exec.metastore.MetastoreMetadataProviderManager;
 import org.apache.drill.exec.planner.common.DrillStatsTable;
 import org.apache.drill.exec.record.SchemaUtil;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.record.metadata.schema.SchemaProvider;
 import org.apache.drill.exec.store.dfs.DrillFileSystem;
 import org.apache.drill.exec.store.dfs.FileSelection;
-import org.apache.drill.exec.store.dfs.ReadEntryWithPath;
-import 
org.apache.drill.exec.store.parquet.ParquetFileTableMetadataProviderBuilder;
-import org.apache.drill.exec.store.parquet.ParquetReaderConfig;
-import org.apache.drill.exec.store.parquet.ParquetTableMetadataProviderImpl;
 import org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils;
 import org.apache.drill.exec.util.DrillFileSystemUtil;
-import org.apache.drill.metastore.MetastoreRegistry;
 import org.apache.drill.metastore.components.tables.BasicTablesRequests;
 import org.apache.drill.metastore.components.tables.MetastoreTableInfo;
 import org.apache.drill.metastore.metadata.BaseTableMetadata;
 import org.apache.drill.metastore.metadata.FileMetadata;
 import org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata;
 import org.apache.drill.metastore.metadata.PartitionMetadata;
-import org.apache.drill.metastore.metadata.RowGroupMetadata;
 import org.apache.drill.metastore.metadata.SegmentMetadata;
 import org.apache.drill.metastore.metadata.TableInfo;
 import org.apache.drill.metastore.metadata.TableMetadata;
+import org.apache.drill.metastore.metadata.TableMetadataProvider;
+import org.apache.drill.metastore.metadata.TableMetadataProviderBuilder;
 import org.apache.drill.metastore.statistics.ColumnStatistics;
 import org.apache.drill.metastore.statistics.ColumnStatisticsKind;
 import org.apache.drill.metastore.statistics.Statistic;
 import org.apache.drill.metastore.statistics.StatisticsHolder;
 import org.apache.drill.metastore.util.SchemaPathUtils;
-import 
org.apache.drill.shaded.guava.com.google.common.collect.LinkedListMultimap;
-import org.apache.drill.shaded.guava.com.google.common.collect.Multimap;
 import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
-public class MetastoreParquetTableMetadataProvider implements 
ParquetTableMetadataProvider {
-  private static final Logger logger = 
LoggerFactory.getLogger(MetastoreParquetTableMetadataProvider.class);
-
-  private final BasicTablesRequests basicTablesRequests;
-  private final TableInfo tableInfo;
-  private final MetastoreTableInfo metastoreTableInfo;
-  private final TupleMetadata schema;
-  private final List<ReadEntryWithPath> entries;
-  private final List<String> paths;
-  private final DrillStatsTable statsProvider;
-
-  private final boolean useSchema;
-  private final boolean useStatistics;
-  private final boolean fallbackToFileMetadata;
-
-  private BaseTableMetadata tableMetadata;
-  private Map<Path, SegmentMetadata> segmentsMetadata;
-  private List<PartitionMetadata> partitions;
-  private Map<Path, FileMetadata> files;
-  private Multimap<Path, RowGroupMetadata> rowGroups;
-  private NonInterestingColumnsMetadata nonInterestingColumnsMetadata;
-  // stores builder to provide lazy init for fallback 
ParquetTableMetadataProvider
-  private final ParquetFileTableMetadataProviderBuilder fallbackBuilder;
-  private ParquetTableMetadataProvider fallback;
-
-  private MetastoreParquetTableMetadataProvider(List<ReadEntryWithPath> 
entries,
-      MetastoreRegistry metastoreRegistry, TableInfo tableInfo, TupleMetadata 
schema,
-      ParquetFileTableMetadataProviderBuilder fallbackBuilder, 
MetastoreMetadataProviderConfig config, DrillStatsTable statsProvider) {
-    this.basicTablesRequests = 
metastoreRegistry.get().tables().basicRequests();
-    this.tableInfo = tableInfo;
-    this.metastoreTableInfo = 
basicTablesRequests.metastoreTableInfo(tableInfo);
-    this.useSchema = config.useSchema();
-    this.useStatistics = config.useStatistics();
-    this.fallbackToFileMetadata = config.fallbackToFileMetadata();
-    this.schema = schema;
-    this.entries = entries == null ? new ArrayList<>() : entries;
-    this.fallbackBuilder = fallbackBuilder;
-    this.statsProvider = statsProvider;
-    this.paths = this.entries.stream()
-        .map(readEntryWithPath -> 
readEntryWithPath.getPath().toUri().getPath())
-        .collect(Collectors.toList());
-  }
-
-  @Override
-  public boolean isUsedMetadataCache() {
-    return false;
-  }
-
-  @Override
-  public Path getSelectionRoot() {
-    return getTableMetadata().getLocation();
-  }
-
-  @Override
-  public List<ReadEntryWithPath> getEntries() {
-    return entries;
-  }
-
-  @Override
-  public List<RowGroupMetadata> getRowGroupsMeta() {
-    return new ArrayList<>(getRowGroupsMetadataMap().values());
-  }
+/**
+ * Implementation of {@link TableMetadataProvider} which uses Drill Metastore 
for providing table metadata
+ * for file-based tables.
+ */
+public class MetastoreFileTableMetadataProvider implements 
TableMetadataProvider {
+  private static final Logger logger = 
LoggerFactory.getLogger(MetastoreFileTableMetadataProvider.class);
+
+  protected final BasicTablesRequests basicTablesRequests;
+  protected final TableInfo tableInfo;
+  protected final MetastoreTableInfo metastoreTableInfo;
+  protected final TupleMetadata schema;
+  protected final List<String> paths;
+  protected final DrillStatsTable statsProvider;
+  protected final TableMetadataProviderBuilder fallbackBuilder;
+
+  protected final boolean useSchema;
+  protected final boolean useStatistics;
+  protected final boolean fallbackToFileMetadata;
+
+  protected BaseTableMetadata tableMetadata;
+  protected Map<Path, SegmentMetadata> segmentsMetadata;
+  protected List<PartitionMetadata> partitions;
+  protected Map<Path, FileMetadata> files;
 
 Review comment:
   For the case of many files, the user can store metadata for partitions only, 
so Drill will do the pruning at partitions level.
   
   Drill is also able to discover that metadata was changed, and for this case, 
it starts planning the query with a newer version of metadata. Number of 
attempts is configured using `metastore.retrieval.retry_attempts` option. For 
the case when the number of attempts is exceeded, the query will be planned 
without metastore usage.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to