joyhaldar commented on code in PR #15038:
URL: https://github.com/apache/iceberg/pull/15038#discussion_r2683081745


##########
core/src/main/java/org/apache/iceberg/RemoveSnapshots.java:
##########
@@ -296,53 +301,51 @@ private Map<String, SnapshotRef> 
computeRetainedRefs(Map<String, SnapshotRef> re
     return retainedRefs;
   }
 
-  private Set<Long> computeAllBranchSnapshotsToRetain(Collection<SnapshotRef> 
refs) {
-    Set<Long> branchSnapshotsToRetain = Sets.newHashSet();
+  private void computeAllBranchSnapshotsToRetain(
+      Collection<SnapshotRef> refs, Set<Long> idsToRetain, Set<Long> 
referencedSnapshotIds) {
     for (SnapshotRef ref : refs) {
       if (ref.isBranch()) {
         long expireSnapshotsOlderThan =
             ref.maxSnapshotAgeMs() != null ? now - ref.maxSnapshotAgeMs() : 
defaultExpireOlderThan;
         int minSnapshotsToKeep =
             ref.minSnapshotsToKeep() != null ? ref.minSnapshotsToKeep() : 
defaultMinNumSnapshots;
-        branchSnapshotsToRetain.addAll(
-            computeBranchSnapshotsToRetain(
-                ref.snapshotId(), expireSnapshotsOlderThan, 
minSnapshotsToKeep));
-      }
-    }
-
-    return branchSnapshotsToRetain;
-  }
-
-  private Set<Long> computeBranchSnapshotsToRetain(
-      long snapshot, long expireSnapshotsOlderThan, int minSnapshotsToKeep) {
-    Set<Long> idsToRetain = Sets.newHashSet();
-    for (Snapshot ancestor : SnapshotUtil.ancestorsOf(snapshot, 
base::snapshot)) {
-      if (idsToRetain.size() < minSnapshotsToKeep
-          || ancestor.timestampMillis() >= expireSnapshotsOlderThan) {
-        idsToRetain.add(ancestor.snapshotId());
+        computeBranchSnapshotsToRetain(
+            ref.snapshotId(),
+            expireSnapshotsOlderThan,
+            minSnapshotsToKeep,
+            idsToRetain,
+            referencedSnapshotIds);
       } else {
-        return idsToRetain;
+        referencedSnapshotIds.add(ref.snapshotId());
       }
     }
-
-    return idsToRetain;
   }
 
-  private Set<Long> unreferencedSnapshotsToRetain(Collection<SnapshotRef> 
refs) {
-    Set<Long> referencedSnapshots = Sets.newHashSet();
-    for (SnapshotRef ref : refs) {
-      if (ref.isBranch()) {
-        for (Snapshot snapshot : SnapshotUtil.ancestorsOf(ref.snapshotId(), 
base::snapshot)) {
-          referencedSnapshots.add(snapshot.snapshotId());
+  private void computeBranchSnapshotsToRetain(
+      long snapshot,
+      long expireSnapshotsOlderThan,
+      int minSnapshotsToKeep,
+      Set<Long> idsToRetain,
+      Set<Long> referencedSnapshotIds) {
+    boolean stillRetaining = true;
+
+    for (Snapshot ancestor : SnapshotUtil.ancestorsOf(snapshot, 
base::snapshot)) {
+      referencedSnapshotIds.add(ancestor.snapshotId());
+      if (stillRetaining) {
+        if (idsToRetain.size() < minSnapshotsToKeep
+            || ancestor.timestampMillis() >= expireSnapshotsOlderThan) {
+          idsToRetain.add(ancestor.snapshotId());
+        } else {
+          stillRetaining = false;

Review Comment:
   Thank you for your review.
   
   The original code returned early here, but `unreferencedSnapshotsToRetain` 
would walk the full chain again anyway. So the early exit wasn't saving any 
work.
   
   Now we walk once and collect both retained and referenced snapshots together.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to