flyrain commented on code in PR #4683:
URL: https://github.com/apache/iceberg/pull/4683#discussion_r879944993
##########
core/src/main/java/org/apache/iceberg/deletes/Deletes.java:
##########
@@ -137,93 +146,81 @@ protected boolean shouldKeep(T row) {
}
}
- private static class PositionSetDeleteFilter<T> extends Filter<T> {
- private final Function<T, Long> rowToPosition;
- private final PositionDeleteIndex deleteSet;
-
- private PositionSetDeleteFilter(Function<T, Long> rowToPosition,
PositionDeleteIndex deleteSet) {
- this.rowToPosition = rowToPosition;
- this.deleteSet = deleteSet;
- }
-
- @Override
- protected boolean shouldKeep(T row) {
- return !deleteSet.isDeleted(rowToPosition.apply(row));
- }
- }
-
private static class PositionStreamDeleteFilter<T> extends CloseableGroup
implements CloseableIterable<T> {
private final CloseableIterable<T> rows;
+ private final CloseableIterator<Long> deletePosIterator;
private final Function<T, Long> extractPos;
- private final CloseableIterable<Long> deletePositions;
+ private long nextDeletePos;
private PositionStreamDeleteFilter(CloseableIterable<T> rows, Function<T,
Long> extractPos,
CloseableIterable<Long>
deletePositions) {
this.rows = rows;
this.extractPos = extractPos;
- this.deletePositions = deletePositions;
+ this.deletePosIterator = deletePositions.iterator();
}
@Override
public CloseableIterator<T> iterator() {
- CloseableIterator<Long> deletePosIterator = deletePositions.iterator();
-
CloseableIterator<T> iter;
if (deletePosIterator.hasNext()) {
- iter = new PositionFilterIterator(rows.iterator(), deletePosIterator);
+ nextDeletePos = deletePosIterator.next();
+ iter = createPosDeleteIterator(rows.iterator());
} else {
iter = rows.iterator();
- try {
- deletePosIterator.close();
- } catch (IOException e) {
- throw new UncheckedIOException("Failed to close delete positions
iterator", e);
- }
}
addCloseable(iter);
+ addCloseable(deletePosIterator);
return iter;
}
- private class PositionFilterIterator extends FilterIterator<T> {
- private final CloseableIterator<Long> deletePosIterator;
- private long nextDeletePos;
+ boolean isDeleted(T row) {
+ long currentPos = extractPos.apply(row);
+ if (currentPos < nextDeletePos) {
+ return false;
+ }
- protected PositionFilterIterator(CloseableIterator<T> items,
CloseableIterator<Long> deletePositions) {
- super(items);
- this.deletePosIterator = deletePositions;
+ // consume delete positions until the next is past the current position
+ boolean isDeleted = currentPos == nextDeletePos;
+ while (deletePosIterator.hasNext() && nextDeletePos <= currentPos) {
this.nextDeletePos = deletePosIterator.next();
+ if (!isDeleted && currentPos == nextDeletePos) {
+ // if any delete position matches the current position
+ isDeleted = true;
+ }
Review Comment:
This logic isn't necessary. Not sure why it is there at the beginning. The
only chance to go into the while loop is that there are duplicated pos deletes.
For example, these positions are deleted, 0L, 0L, 1L, 3L, 3L. It is not
necessary to set anything in that case, line 185 already check it. To provide
more context, both row position and pos deletes are sorted. Check
testPositionStreamRowFilterWithDuplicates for details.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]