aokolnychyi commented on code in PR #4847:
URL: https://github.com/apache/iceberg/pull/4847#discussion_r904527353
##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
return ImmutableList.of(this); // don't split
}
}
+
+ static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile
manifest, long referencedSnapshotId) {
+ return StaticDataTask.Row.of(
+ manifest.content().id(),
+ manifest.path(),
+ manifest.length(),
+ manifest.partitionSpecId(),
+ manifest.snapshotId(),
+ manifest.content() == ManifestContent.DATA ?
manifest.addedFilesCount() : 0,
+ manifest.content() == ManifestContent.DATA ?
manifest.existingFilesCount() : 0,
+ manifest.content() == ManifestContent.DATA ?
manifest.deletedFilesCount() : 0,
+ manifest.content() == ManifestContent.DELETES ?
manifest.addedFilesCount() : 0,
+ manifest.content() == ManifestContent.DELETES ?
manifest.existingFilesCount() : 0,
+ manifest.content() == ManifestContent.DELETES ?
manifest.deletedFilesCount() : 0,
+ ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+ referencedSnapshotId
+ );
+ }
+
+ private static class SnapshotEvaluator extends
BoundExpressionVisitor<Boolean> {
+
+ private Expression boundExpr;
+ private Long snapshotId;
+
+ private static final boolean ROWS_MIGHT_MATCH = true;
+ private static final boolean ROWS_CANNOT_MATCH = false;
+
+ private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType
structType, boolean caseSensitive) {
+ this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+ this.snapshotId = snap.snapshotId();
+ }
+
+ private boolean eval() {
+ return ExpressionVisitors.visitEvaluator(boundExpr, this);
+ }
+
+ @Override
+ public Boolean alwaysTrue() {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public Boolean alwaysFalse() {
+ return ROWS_CANNOT_MATCH;
+ }
+
+ @Override
+ public Boolean not(Boolean result) {
+ return !result;
+ }
+
+ @Override
+ public Boolean and(Boolean leftResult, Boolean rightResult) {
+ return leftResult && rightResult;
+ }
+
+ @Override
+ public Boolean or(Boolean leftResult, Boolean rightResult) {
+ return leftResult || rightResult;
+ }
+
+ @Override
+ public <T> Boolean isNull(BoundReference<T> ref) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean notNull(BoundReference<T> ref) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean isNaN(BoundReference<T> ref) {
+ if (snapshotIdRef(ref)) {
+ return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+ } else {
+ return ROWS_MIGHT_MATCH;
+ }
+ }
+
+ @Override
+ public <T> Boolean notNaN(BoundReference<T> ref) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult < 0);
+ }
+
+ @Override
+ public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult <= 0);
+ }
+
+ @Override
+ public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult > 0);
+ }
+
+ @Override
+ public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult >= 0);
+ }
+
+ @Override
+ public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult == 0);
+ }
+
+ @Override
+ public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult != 0);
+ }
+
+ @Override
+ public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+ Comparator<Object> longComparator =
Comparators.forType(Types.LongType.get());
+ if (snapshotIdRef(ref)) {
+ boolean in = literalSet.stream().anyMatch(lit ->
longComparator.compare(snapshotId, lit) == 0);
+ if (!in) {
+ return ROWS_CANNOT_MATCH;
+ }
+ }
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+ Comparator<Object> longComparator =
Comparators.forType(Types.LongType.get());
+ if (snapshotIdRef(ref)) {
+ boolean notIn = literalSet.stream().noneMatch(lit ->
longComparator.compare(snapshotId, lit) == 0);
+ if (!notIn) {
+ return ROWS_CANNOT_MATCH;
+ }
+ }
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean notStartsWith(BoundReference<T> ref, Literal<T> lit) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ /**
+ * Comparison of snapshot reference and literal
+ * @param ref bound reference, comparison attempted only if reference is
for reference_snapshot_id
+ * @param lit literal value to compare with snapshot id.
+ * @param compareResult function to apply to comparison result, which
returns true if rows might match
+ * @return boolean true if rows might match, false if cannot match
+ */
+ private <T> Boolean compare(BoundReference<T> ref,
+ Literal<T> lit,
+ Function<Integer, Boolean> compareResult) {
+
+ if (snapshotIdRef(ref)) {
+ Literal<Long> longLit = lit.to(Types.LongType.get());
+ int cmp = longLit.comparator().compare(snapshotId, longLit.value());
+ if (!compareResult.apply(cmp)) {
+ return ROWS_CANNOT_MATCH;
+ }
+ }
+ return ROWS_MIGHT_MATCH;
+ }
+
+ private <T> boolean snapshotIdRef(BoundReference<T> ref) {
Review Comment:
nit: Will it be a bit more readable if we added `is` to the method name? I
know we don't always do that but I feel like it would make sense. What do you
think?
##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
return ImmutableList.of(this); // don't split
}
}
+
+ static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile
manifest, long referencedSnapshotId) {
+ return StaticDataTask.Row.of(
+ manifest.content().id(),
+ manifest.path(),
+ manifest.length(),
+ manifest.partitionSpecId(),
+ manifest.snapshotId(),
+ manifest.content() == ManifestContent.DATA ?
manifest.addedFilesCount() : 0,
+ manifest.content() == ManifestContent.DATA ?
manifest.existingFilesCount() : 0,
+ manifest.content() == ManifestContent.DATA ?
manifest.deletedFilesCount() : 0,
+ manifest.content() == ManifestContent.DELETES ?
manifest.addedFilesCount() : 0,
+ manifest.content() == ManifestContent.DELETES ?
manifest.existingFilesCount() : 0,
+ manifest.content() == ManifestContent.DELETES ?
manifest.deletedFilesCount() : 0,
+ ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+ referencedSnapshotId
+ );
+ }
+
+ private static class SnapshotEvaluator extends
BoundExpressionVisitor<Boolean> {
+
+ private Expression boundExpr;
+ private Long snapshotId;
+
+ private static final boolean ROWS_MIGHT_MATCH = true;
+ private static final boolean ROWS_CANNOT_MATCH = false;
+
+ private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType
structType, boolean caseSensitive) {
+ this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+ this.snapshotId = snap.snapshotId();
+ }
+
+ private boolean eval() {
+ return ExpressionVisitors.visitEvaluator(boundExpr, this);
+ }
+
+ @Override
+ public Boolean alwaysTrue() {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public Boolean alwaysFalse() {
+ return ROWS_CANNOT_MATCH;
+ }
+
+ @Override
+ public Boolean not(Boolean result) {
+ return !result;
+ }
+
+ @Override
+ public Boolean and(Boolean leftResult, Boolean rightResult) {
+ return leftResult && rightResult;
+ }
+
+ @Override
+ public Boolean or(Boolean leftResult, Boolean rightResult) {
+ return leftResult || rightResult;
+ }
+
+ @Override
+ public <T> Boolean isNull(BoundReference<T> ref) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean notNull(BoundReference<T> ref) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean isNaN(BoundReference<T> ref) {
+ if (snapshotIdRef(ref)) {
+ return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+ } else {
+ return ROWS_MIGHT_MATCH;
+ }
+ }
+
+ @Override
+ public <T> Boolean notNaN(BoundReference<T> ref) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult < 0);
+ }
+
+ @Override
+ public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult <= 0);
+ }
+
+ @Override
+ public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult > 0);
+ }
+
+ @Override
+ public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult >= 0);
+ }
+
+ @Override
+ public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult == 0);
+ }
+
+ @Override
+ public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+ return compare(ref, lit, compareResult -> compareResult != 0);
+ }
+
+ @Override
+ public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+ Comparator<Object> longComparator =
Comparators.forType(Types.LongType.get());
+ if (snapshotIdRef(ref)) {
+ boolean in = literalSet.stream().anyMatch(lit ->
longComparator.compare(snapshotId, lit) == 0);
+ if (!in) {
+ return ROWS_CANNOT_MATCH;
+ }
+ }
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+ Comparator<Object> longComparator =
Comparators.forType(Types.LongType.get());
+ if (snapshotIdRef(ref)) {
+ boolean notIn = literalSet.stream().noneMatch(lit ->
longComparator.compare(snapshotId, lit) == 0);
+ if (!notIn) {
+ return ROWS_CANNOT_MATCH;
+ }
+ }
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ @Override
+ public <T> Boolean notStartsWith(BoundReference<T> ref, Literal<T> lit) {
+ return ROWS_MIGHT_MATCH;
+ }
+
+ /**
+ * Comparison of snapshot reference and literal
+ * @param ref bound reference, comparison attempted only if reference is
for reference_snapshot_id
+ * @param lit literal value to compare with snapshot id.
+ * @param compareResult function to apply to comparison result, which
returns true if rows might match
+ * @return boolean true if rows might match, false if cannot match
+ */
+ private <T> Boolean compare(BoundReference<T> ref,
+ Literal<T> lit,
+ Function<Integer, Boolean> compareResult) {
Review Comment:
nit: Is there a better name than compareResult for this function?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]