amogh-jahagirdar commented on code in PR #4578:
URL: https://github.com/apache/iceberg/pull/4578#discussion_r853341526
##########
core/src/main/java/org/apache/iceberg/RemoveSnapshots.java:
##########
@@ -163,21 +180,86 @@ private TableMetadata internalApply() {
this.base = ops.refresh();
Set<Long> idsToRetain = Sets.newHashSet();
- List<Long> ancestorIds = SnapshotUtil.ancestorIds(base.currentSnapshot(),
base::snapshot);
- if (minNumSnapshots >= ancestorIds.size()) {
+ // Identify refs that should be removed
+ Set<String> refsToRemove = Sets.newHashSet();
+ for (String name : base.refs().keySet()) {
+ SnapshotRef ref = base.ref(name);
+ if (!name.equals(SnapshotRef.MAIN_BRANCH) && ref.maxRefAgeMs() != null) {
+ Snapshot snapshot = base.snapshot(ref.snapshotId());
+ if (removalTimeReference - snapshot.timestampMillis() >=
ref.maxRefAgeMs()) {
+ refsToRemove.add(name);
+ }
+
+ }
+
+ if (!refsToRemove.contains(name) &&
!idsToRemove.contains(ref.snapshotId())) {
+ idsToRetain.add(ref.snapshotId());
+ }
+
+ }
+ List<Map.Entry<String, SnapshotRef>> branches = base
+ .refs()
+ .entrySet()
+ .stream()
+ .filter(refEntry -> refEntry.getValue().isBranch())
+ .collect(Collectors.toList());
+
+ for (Map.Entry<String, SnapshotRef> branchEntry : branches) {
+ String name = branchEntry.getKey();
+ SnapshotRef branch = branchEntry.getValue();
+ Long expireSnapshotsOlderThan = null;
+ Integer minSnapshotsToKeep = null;
+ if (name.equals(SnapshotRef.MAIN_BRANCH)) {
+ expireSnapshotsOlderThan = this.expireOlderThan;
+ minSnapshotsToKeep = this.minNumSnapshots;
+ } else if (branch.maxSnapshotAgeMs() != null) {
+ expireSnapshotsOlderThan = removalTimeReference -
branch.maxSnapshotAgeMs();
+ }
+
+ if (branch.minSnapshotsToKeep() != null) {
+ minSnapshotsToKeep = branch.minSnapshotsToKeep();
+ }
+
+ Set<Long> branchSnapshotsToRetain =
+ computeBranchSnapshotsToRetain(branch, expireSnapshotsOlderThan,
minSnapshotsToKeep);
+
+ idsToRetain.addAll(branchSnapshotsToRetain);
+ }
+
+ TableMetadata.Builder updatedMetaBuilder = TableMetadata.buildFrom(base);
+ idsToRemove.addAll(base.snapshots().stream().map(Snapshot::snapshotId)
+ .filter(snapshot ->
!idsToRetain.contains(snapshot)).collect(Collectors.toSet()));
+ updatedMetaBuilder.removeSnapshots(idsToRemove);
+ refsToRemove.forEach(updatedMetaBuilder::removeRef);
+ TableMetadata updated = updatedMetaBuilder.build();
+ List<Snapshot> updateSnapshots = updated.snapshots();
+ List<Snapshot> baseSnapshots = base.snapshots();
+ return updateSnapshots.size() != baseSnapshots.size() ? updated : base;
+ }
+
+ private Set<Long> computeBranchSnapshotsToRetain(SnapshotRef branch, Long
expireSnapshotsOlderThan,
+ Integer minSnapshotsToKeep) {
+
+ Snapshot snapshot = base.snapshot(branch.snapshotId());
+ List<Long> ancestorIds = SnapshotUtil.ancestorIds(snapshot,
base::snapshot);
+ Set<Long> idsToRetain = Sets.newHashSet();
+ // Everything on this branch should be retained
+ if (expireSnapshotsOlderThan == null || minSnapshotsToKeep == null ||
minSnapshotsToKeep >= ancestorIds.size()) {
Review Comment:
> The condition would evaluate to true if we have expireSnapshotsOlderThan
!= null and minSnapshotsToKeep >= ancestorIds.size()
That is correct!
> In that case, can some of the ancestorIds have ancestor.timestampMillis()
< expireOlderThan? If yes, should we retain them?
Yes we would be retaining them even if some of the ancestors are older than
the threshold. I believe this should be the desired behavior because a user
explicitly set this number of minimum number of snapshots to retain we would
respect that (since retention is inclusive of what the user set).
Let me know your thoughts here!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]