vvysotskyi commented on a change in pull request #1646: DRILL-6852: Adapt 
current Parquet Metadata cache implementation to use Drill Metastore API
URL: https://github.com/apache/drill/pull/1646#discussion_r265792525
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
 ##########
 @@ -219,229 +200,176 @@ public ScanStats getScanStats() {
     Preconditions.checkArgument(!rowGroupsForMinor.isEmpty(),
         String.format("MinorFragmentId %d has no read entries assigned", 
minorFragmentId));
 
-    List<RowGroupReadEntry> entries = new ArrayList<>();
+    List<RowGroupReadEntry> readEntries = new ArrayList<>();
     for (RowGroupInfo rgi : rowGroupsForMinor) {
-      RowGroupReadEntry entry = new RowGroupReadEntry(rgi.getPath(), 
rgi.getStart(), rgi.getLength(), rgi.getRowGroupIndex(), 
rgi.getNumRecordsToRead());
-      entries.add(entry);
+      RowGroupReadEntry entry = new RowGroupReadEntry(rgi.getPath(), 
rgi.getStart(),
+          rgi.getLength(), rgi.getRowGroupIndex(),
+          rgi.getNumRecordsToRead());
+      readEntries.add(entry);
     }
-    return entries;
-  }
-
-  // filter push down methods block start
-  @JsonProperty
-  @Override
-  public LogicalExpression getFilter() {
-    return filter;
-  }
-
-  public void setFilter(LogicalExpression filter) {
-    this.filter = filter;
+    return readEntries;
   }
 
   @Override
-  public AbstractParquetGroupScan applyFilter(LogicalExpression filterExpr, 
UdfUtilities udfUtilities,
+  public AbstractGroupScanWithMetadata applyFilter(LogicalExpression 
filterExpr, UdfUtilities udfUtilities,
       FunctionImplementationRegistry functionImplementationRegistry, 
OptionManager optionManager) {
-
-    if (!parquetTableMetadata.isRowGroupPrunable() ||
-        rowGroupInfos.size() > 
optionManager.getOption(PlannerSettings.PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD))
 {
-      // Stop pruning for 2 cases:
-      //    -  metadata does not have proper format to support row group level 
filter pruning,
-      //    -  # of row groups is beyond 
PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD.
-      return null;
-    }
-
-    final Set<SchemaPath> schemaPathsInExpr = filterExpr.accept(new 
ParquetRGFilterEvaluator.FieldReferenceFinder(), null);
-
-    final List<RowGroupInfo> qualifiedRGs = new 
ArrayList<>(rowGroupInfos.size());
-
-    ParquetFilterPredicate filterPredicate = 
getParquetFilterPredicate(filterExpr, udfUtilities, 
functionImplementationRegistry, optionManager, true);
-
+    // Builds filter for pruning. If filter cannot be built, null should be 
returned.
+    FilterPredicate filterPredicate = getFilterPredicate(filterExpr, 
udfUtilities, functionImplementationRegistry, optionManager, true);
     if (filterPredicate == null) {
+      logger.debug("FilterPredicate cannot be built.");
       return null;
     }
 
-    boolean matchAllRowGroupsLocal = true;
-
-    for (RowGroupInfo rowGroup : rowGroupInfos) {
-      final ColumnExplorer columnExplorer = new ColumnExplorer(optionManager, 
columns);
-      List<String> partitionValues = getPartitionValues(rowGroup);
-      Map<String, String> implicitColValues = 
columnExplorer.populateImplicitColumns(rowGroup.getPath(), partitionValues, 
supportsFileImplicitColumns());
+    Set<SchemaPath> schemaPathsInExpr =
+        filterExpr.accept(new FilterEvaluatorUtils.FieldReferenceFinder(), 
null);
 
-      ParquetMetaStatCollector statCollector = new ParquetMetaStatCollector(
-          parquetTableMetadata,
-          rowGroup.getColumns(),
-          implicitColValues);
+    RowGroupScanFilterer builder = getFilterer().getFiltered(optionManager, 
filterPredicate, schemaPathsInExpr);
 
-      Map<SchemaPath, ColumnStatistics> columnStatisticsMap = 
statCollector.collectColStat(schemaPathsInExpr);
-
-      ParquetFilterPredicate.RowsMatch match = 
ParquetRGFilterEvaluator.matches(filterPredicate,
-          columnStatisticsMap, rowGroup.getRowCount(), parquetTableMetadata, 
rowGroup.getColumns(), schemaPathsInExpr);
-      if (match == ParquetFilterPredicate.RowsMatch.NONE) {
-        continue; // No row comply to the filter => drop the row group
+    if (getRowGroupsMetadata() != null) {
 
 Review comment:
   Good question! In the current code, it cannot be null since 
`ParquetMetadataProvider` cannot return null when calling `getRowGroupsMeta()` 
method, but in future will be provided other implementations of 
`TableMetadataProvider` which can return null for example for the case when the 
number of row groups is pretty large. So all the "else" will be suitable for 
such a case.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to