vvysotskyi commented on a change in pull request #1646: DRILL-6852: Adapt 
current Parquet Metadata cache implementation to use Drill Metastore API
URL: https://github.com/apache/drill/pull/1646#discussion_r266027492
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
 ##########
 @@ -219,229 +200,176 @@ public ScanStats getScanStats() {
     Preconditions.checkArgument(!rowGroupsForMinor.isEmpty(),
         String.format("MinorFragmentId %d has no read entries assigned", 
minorFragmentId));
 
-    List<RowGroupReadEntry> entries = new ArrayList<>();
+    List<RowGroupReadEntry> readEntries = new ArrayList<>();
     for (RowGroupInfo rgi : rowGroupsForMinor) {
-      RowGroupReadEntry entry = new RowGroupReadEntry(rgi.getPath(), 
rgi.getStart(), rgi.getLength(), rgi.getRowGroupIndex(), 
rgi.getNumRecordsToRead());
-      entries.add(entry);
+      RowGroupReadEntry entry = new RowGroupReadEntry(rgi.getPath(), 
rgi.getStart(),
+          rgi.getLength(), rgi.getRowGroupIndex(),
+          rgi.getNumRecordsToRead());
+      readEntries.add(entry);
     }
-    return entries;
-  }
-
-  // filter push down methods block start
-  @JsonProperty
-  @Override
-  public LogicalExpression getFilter() {
-    return filter;
-  }
-
-  public void setFilter(LogicalExpression filter) {
-    this.filter = filter;
+    return readEntries;
   }
 
   @Override
-  public AbstractParquetGroupScan applyFilter(LogicalExpression filterExpr, 
UdfUtilities udfUtilities,
+  public AbstractGroupScanWithMetadata applyFilter(LogicalExpression 
filterExpr, UdfUtilities udfUtilities,
       FunctionImplementationRegistry functionImplementationRegistry, 
OptionManager optionManager) {
-
-    if (!parquetTableMetadata.isRowGroupPrunable() ||
-        rowGroupInfos.size() > 
optionManager.getOption(PlannerSettings.PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD))
 {
-      // Stop pruning for 2 cases:
-      //    -  metadata does not have proper format to support row group level 
filter pruning,
-      //    -  # of row groups is beyond 
PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD.
-      return null;
-    }
-
-    final Set<SchemaPath> schemaPathsInExpr = filterExpr.accept(new 
ParquetRGFilterEvaluator.FieldReferenceFinder(), null);
-
-    final List<RowGroupInfo> qualifiedRGs = new 
ArrayList<>(rowGroupInfos.size());
-
-    ParquetFilterPredicate filterPredicate = 
getParquetFilterPredicate(filterExpr, udfUtilities, 
functionImplementationRegistry, optionManager, true);
-
+    // Builds filter for pruning. If filter cannot be built, null should be 
returned.
+    FilterPredicate filterPredicate = getFilterPredicate(filterExpr, 
udfUtilities, functionImplementationRegistry, optionManager, true);
     if (filterPredicate == null) {
+      logger.debug("FilterPredicate cannot be built.");
       return null;
     }
 
-    boolean matchAllRowGroupsLocal = true;
-
-    for (RowGroupInfo rowGroup : rowGroupInfos) {
-      final ColumnExplorer columnExplorer = new ColumnExplorer(optionManager, 
columns);
-      List<String> partitionValues = getPartitionValues(rowGroup);
-      Map<String, String> implicitColValues = 
columnExplorer.populateImplicitColumns(rowGroup.getPath(), partitionValues, 
supportsFileImplicitColumns());
+    Set<SchemaPath> schemaPathsInExpr =
+        filterExpr.accept(new FilterEvaluatorUtils.FieldReferenceFinder(), 
null);
 
-      ParquetMetaStatCollector statCollector = new ParquetMetaStatCollector(
-          parquetTableMetadata,
-          rowGroup.getColumns(),
-          implicitColValues);
+    RowGroupScanFilterer builder = getFilterer().getFiltered(optionManager, 
filterPredicate, schemaPathsInExpr);
 
-      Map<SchemaPath, ColumnStatistics> columnStatisticsMap = 
statCollector.collectColStat(schemaPathsInExpr);
-
-      ParquetFilterPredicate.RowsMatch match = 
ParquetRGFilterEvaluator.matches(filterPredicate,
-          columnStatisticsMap, rowGroup.getRowCount(), parquetTableMetadata, 
rowGroup.getColumns(), schemaPathsInExpr);
-      if (match == ParquetFilterPredicate.RowsMatch.NONE) {
-        continue; // No row comply to the filter => drop the row group
+    if (getRowGroupsMetadata() != null) {
 
 Review comment:
   I have made some changes, so now for the case when metadata is not 
available, and it may be returned as a collection, an empty collection will be 
returned instead of null to reduce calls into `TableMetadataProvider`. Added 
the corresponding comment into `TableMetadataProvider` and replaced unneeded 
null checks by `!.isEmpty()` check in the last commit.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to