This is an automated email from the ASF dual-hosted git repository.

amoghj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 99843f03ef Core: Expired Snapshot files in a transaction should be 
deleted. (#9183)
99843f03ef is described below

commit 99843f03efecc3f5ef7d3e1d32aae5eff22cb315
Author: Andrew Sherman <[email protected]>
AuthorDate: Mon Dec 4 14:46:37 2023 -0800

    Core: Expired Snapshot files in a transaction should be deleted. (#9183)
    
    When a snapshot is expired as part of a transaction, the manifest list
    file(s) should be deleted when the transaction commits. A recent change
    (#6634) ensured that these files are not deleted when they have also
    been committed as part of a transaction, but this breaks the simple
    case where no new files are committed. Fix this by not skipping
    deletion when the list of committed files is empty.
    
    TESTING:
    
    Extended a unit test to ensure that manifest list files are deleted.
    Ran the test without the fix on a branch where #6634 was reverted
    to show that this is a regression.
---
 .../java/org/apache/iceberg/BaseTransaction.java   | 24 +++++++++-------------
 .../java/org/apache/iceberg/TableTestBase.java     |  9 ++++++++
 .../org/apache/iceberg/TestRemoveSnapshots.java    |  7 ++++++-
 .../iceberg/TestSequenceNumberForV2Table.java      |  8 ++++++++
 4 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/BaseTransaction.java 
b/core/src/main/java/org/apache/iceberg/BaseTransaction.java
index 32cf695c8b..018f70eb16 100644
--- a/core/src/main/java/org/apache/iceberg/BaseTransaction.java
+++ b/core/src/main/java/org/apache/iceberg/BaseTransaction.java
@@ -446,20 +446,16 @@ public class BaseTransaction implements Transaction {
       }
 
       Set<String> committedFiles = committedFiles(ops, newSnapshots);
-      if (committedFiles != null) {
-        // delete all of the files that were deleted in the most recent set of 
operation commits
-        Tasks.foreach(deletedFiles)
-            .suppressFailureWhenFinished()
-            .onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted 
file: {}", file, exc))
-            .run(
-                path -> {
-                  if (!committedFiles.contains(path)) {
-                    ops.io().deleteFile(path);
-                  }
-                });
-      } else {
-        LOG.warn("Failed to load metadata for a committed snapshot, skipping 
clean-up");
-      }
+      // delete all of the files that were deleted in the most recent set of 
operation commits
+      Tasks.foreach(deletedFiles)
+          .suppressFailureWhenFinished()
+          .onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted 
file: {}", file, exc))
+          .run(
+              path -> {
+                if (committedFiles == null || !committedFiles.contains(path)) {
+                  ops.io().deleteFile(path);
+                }
+              });
 
     } catch (RuntimeException e) {
       LOG.warn("Failed to load committed metadata, skipping clean-up", e);
diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java 
b/core/src/test/java/org/apache/iceberg/TableTestBase.java
index 68ce055289..c3db859101 100644
--- a/core/src/test/java/org/apache/iceberg/TableTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java
@@ -212,6 +212,15 @@ public class TableTestBase {
                         && 
Files.getFileExtension(name).equalsIgnoreCase("avro")));
   }
 
+  List<File> listManifestLists(String tableDirToList) {
+    return Lists.newArrayList(
+        new File(tableDirToList, "metadata")
+            .listFiles(
+                (dir, name) ->
+                    name.startsWith("snap")
+                        && 
Files.getFileExtension(name).equalsIgnoreCase("avro")));
+  }
+
   public static long countAllMetadataFiles(File tableDir) {
     return Arrays.stream(new File(tableDir, "metadata").listFiles())
         .filter(f -> f.isFile())
diff --git a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java 
b/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
index 71455c5712..fc3aa6c916 100644
--- a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
+++ b/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
@@ -440,7 +440,10 @@ public class TestRemoveSnapshots extends TableTestBase {
       t3 = System.currentTimeMillis();
     }
 
-    // Retain last 2 snapshots
+    Assert.assertEquals(
+        "Should be 3 manifest lists", 3, 
listManifestLists(table.location()).size());
+
+    // Retain last 2 snapshots, which means 1 is deleted.
     Transaction tx = table.newTransaction();
     removeSnapshots(tx.table()).expireOlderThan(t3).retainLast(2).commit();
     tx.commitTransaction();
@@ -449,6 +452,8 @@ public class TestRemoveSnapshots extends TableTestBase {
         "Should have two snapshots.", 2, 
Lists.newArrayList(table.snapshots()).size());
     Assert.assertEquals(
         "First snapshot should not present.", null, 
table.snapshot(firstSnapshotId));
+    Assert.assertEquals(
+        "Should be 2 manifest lists", 2, 
listManifestLists(table.location()).size());
   }
 
   @Test
diff --git 
a/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java 
b/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java
index 08bdc64a08..86842b6812 100644
--- a/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java
+++ b/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java
@@ -309,6 +309,8 @@ public class TestSequenceNumberForV2Table extends 
TableTestBase {
     V2Assert.assertEquals("Snapshot sequence number should be 1", 1, 
snap1.sequenceNumber());
     V2Assert.assertEquals(
         "Last sequence number should be 1", 1, 
readMetadata().lastSequenceNumber());
+    V2Assert.assertEquals(
+        "Should be 1 manifest list", 1, 
listManifestLists(table.location()).size());
 
     table.newAppend().appendFile(FILE_B).commit();
     Snapshot snap2 = table.currentSnapshot();
@@ -319,12 +321,18 @@ public class TestSequenceNumberForV2Table extends 
TableTestBase {
     V2Assert.assertEquals("Snapshot sequence number should be 2", 2, 
snap2.sequenceNumber());
     V2Assert.assertEquals(
         "Last sequence number should be 2", 2, 
readMetadata().lastSequenceNumber());
+    V2Assert.assertEquals(
+        "Should be 2 manifest lists", 2, 
listManifestLists(table.location()).size());
 
     Transaction txn = table.newTransaction();
     txn.expireSnapshots().expireSnapshotId(commitId1).commit();
     txn.commitTransaction();
     V2Assert.assertEquals(
         "Last sequence number should be 2", 2, 
readMetadata().lastSequenceNumber());
+    V2Assert.assertEquals(
+        "Should be 1 manifest list as 1 was deleted",
+        1,
+        listManifestLists(table.location()).size());
   }
 
   @Test

Reply via email to