aokolnychyi commented on code in PR #7582:
URL: https://github.com/apache/iceberg/pull/7582#discussion_r1276879397
##########
core/src/main/java/org/apache/iceberg/PositionDeletesTable.java:
##########
@@ -155,39 +167,87 @@ protected List<String> scanColumns() {
return context().returnColumnStats() ? DELETE_SCAN_WITH_STATS_COLUMNS :
DELETE_SCAN_COLUMNS;
}
+ /**
+ * Sets a filter that applies on base table of this position deletes
table, to use for this
+ * scan.
+ *
+ * <p>Only the partition expressions part of the filter will be applied to
the position deletes
+ * table, as the schema of the base table does not otherwise match the
schema of position
+ * deletes table.
+ *
+ * <ul>
+ * <li>Only the partition expressions of the filter that can be
projected on the base table
+ * partition specs, via {@link
+ *
org.apache.iceberg.expressions.Projections.ProjectionEvaluator#project(Expression)}
+ * will be evaluated. Note, not all partition expressions can be
projected.
+ * <li>Because it cannot apply beyond the partition expression, this
filter will not
+ * contribute to the residuals of tasks returned by this scan. (See
{@link
+ * PositionDeletesScanTask#residual()})
+ * </ul>
+ *
+ * @param expr expression filter that applies on the base table of this
posiiton deletes table
+ * @return this for method chaining
+ */
+ public BatchScan baseTableFilter(Expression expr) {
+ return new PositionDeletesBatchScan(
+ table(), schema(), context(), Expressions.and(baseTableFilter,
expr));
+ }
+
+ /**
+ * Sets a filter to use for this scan.
+ *
+ * <p>This must be set exclusively from {@link
#baseTableFilter(Expression)}
+ *
+ * @param expr expression filter
+ * @return this for method chaining
+ */
+ @Override
+ public BatchScan filter(Expression expr) {
+ return new PositionDeletesBatchScan(
+ table(),
+ schema(),
+ context().filterRows(Expressions.and(context().rowFilter(), expr)),
+ baseTableFilter);
+ }
+
@Override
protected CloseableIterable<ScanTask> doPlanFiles() {
String schemaString = SchemaParser.toJson(tableSchema());
// prepare transformed partition specs and caches
Map<Integer, PartitionSpec> transformedSpecs =
transformSpecs(tableSchema(), table().specs());
+ LoadingCache<Integer, String> specStringCache =
+ partitionCacheOf(transformedSpecs, PartitionSpecParser::toJson);
+ LoadingCache<Integer, ManifestEvaluator> deletesTableEvalCache =
+ partitionCacheOf(
+ transformedSpecs,
+ spec -> ManifestEvaluator.forRowFilter(filter(), spec,
isCaseSensitive()));
+ LoadingCache<Integer, ManifestEvaluator> baseTableEvalCache =
+ partitionCacheOf(
+ table().specs(), // evaluate base table filters on base table
specs
+ spec -> ManifestEvaluator.forRowFilter(baseTableFilter, spec,
isCaseSensitive()));
LoadingCache<Integer, ResidualEvaluator> residualCache =
partitionCacheOf(
transformedSpecs,
spec ->
ResidualEvaluator.of(
spec,
+ // there are no applicable filters in the base table's
filter
+ // that we can use to evaluate on the position deletes
table
shouldIgnoreResiduals() ? Expressions.alwaysTrue() :
filter(),
isCaseSensitive()));
- LoadingCache<Integer, String> specStringCache =
- partitionCacheOf(transformedSpecs, PartitionSpecParser::toJson);
-
- LoadingCache<Integer, ManifestEvaluator> evalCache =
- partitionCacheOf(
- transformedSpecs,
- spec -> ManifestEvaluator.forRowFilter(filter(), spec,
isCaseSensitive()));
-
// iterate through delete manifests
List<ManifestFile> manifests = snapshot().deleteManifests(table().io());
CloseableIterable<ManifestFile> matchingManifests =
Review Comment:
Shall we do this filter only if either of the filter expression is
non-trivial? Otherwise, what's the point of doing this work?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]