This is an automated email from the ASF dual-hosted git repository.
amoghj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 99843f03ef Core: Expired Snapshot files in a transaction should be
deleted. (#9183)
99843f03ef is described below
commit 99843f03efecc3f5ef7d3e1d32aae5eff22cb315
Author: Andrew Sherman <[email protected]>
AuthorDate: Mon Dec 4 14:46:37 2023 -0800
Core: Expired Snapshot files in a transaction should be deleted. (#9183)
When a snapshot is expired as part of a transaction, the manifest list
file(s) should be deleted when the transaction commits. A recent change
(#6634) ensured that these files are not deleted when they have also
been committed as part of a transaction, but this breaks the simple
case where no new files are committed. Fix this by not skipping
deletion when the list of committed files is empty.
TESTING:
Extended a unit test to ensure that manifest list files are deleted.
Ran the test without the fix on a branch where #6634 was reverted
to show that this is a regression.
---
.../java/org/apache/iceberg/BaseTransaction.java | 24 +++++++++-------------
.../java/org/apache/iceberg/TableTestBase.java | 9 ++++++++
.../org/apache/iceberg/TestRemoveSnapshots.java | 7 ++++++-
.../iceberg/TestSequenceNumberForV2Table.java | 8 ++++++++
4 files changed, 33 insertions(+), 15 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/BaseTransaction.java
b/core/src/main/java/org/apache/iceberg/BaseTransaction.java
index 32cf695c8b..018f70eb16 100644
--- a/core/src/main/java/org/apache/iceberg/BaseTransaction.java
+++ b/core/src/main/java/org/apache/iceberg/BaseTransaction.java
@@ -446,20 +446,16 @@ public class BaseTransaction implements Transaction {
}
Set<String> committedFiles = committedFiles(ops, newSnapshots);
- if (committedFiles != null) {
- // delete all of the files that were deleted in the most recent set of
operation commits
- Tasks.foreach(deletedFiles)
- .suppressFailureWhenFinished()
- .onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted
file: {}", file, exc))
- .run(
- path -> {
- if (!committedFiles.contains(path)) {
- ops.io().deleteFile(path);
- }
- });
- } else {
- LOG.warn("Failed to load metadata for a committed snapshot, skipping
clean-up");
- }
+ // delete all of the files that were deleted in the most recent set of
operation commits
+ Tasks.foreach(deletedFiles)
+ .suppressFailureWhenFinished()
+ .onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted
file: {}", file, exc))
+ .run(
+ path -> {
+ if (committedFiles == null || !committedFiles.contains(path)) {
+ ops.io().deleteFile(path);
+ }
+ });
} catch (RuntimeException e) {
LOG.warn("Failed to load committed metadata, skipping clean-up", e);
diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java
b/core/src/test/java/org/apache/iceberg/TableTestBase.java
index 68ce055289..c3db859101 100644
--- a/core/src/test/java/org/apache/iceberg/TableTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java
@@ -212,6 +212,15 @@ public class TableTestBase {
&&
Files.getFileExtension(name).equalsIgnoreCase("avro")));
}
+ List<File> listManifestLists(String tableDirToList) {
+ return Lists.newArrayList(
+ new File(tableDirToList, "metadata")
+ .listFiles(
+ (dir, name) ->
+ name.startsWith("snap")
+ &&
Files.getFileExtension(name).equalsIgnoreCase("avro")));
+ }
+
public static long countAllMetadataFiles(File tableDir) {
return Arrays.stream(new File(tableDir, "metadata").listFiles())
.filter(f -> f.isFile())
diff --git a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
b/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
index 71455c5712..fc3aa6c916 100644
--- a/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
+++ b/core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java
@@ -440,7 +440,10 @@ public class TestRemoveSnapshots extends TableTestBase {
t3 = System.currentTimeMillis();
}
- // Retain last 2 snapshots
+ Assert.assertEquals(
+ "Should be 3 manifest lists", 3,
listManifestLists(table.location()).size());
+
+ // Retain last 2 snapshots, which means 1 is deleted.
Transaction tx = table.newTransaction();
removeSnapshots(tx.table()).expireOlderThan(t3).retainLast(2).commit();
tx.commitTransaction();
@@ -449,6 +452,8 @@ public class TestRemoveSnapshots extends TableTestBase {
"Should have two snapshots.", 2,
Lists.newArrayList(table.snapshots()).size());
Assert.assertEquals(
"First snapshot should not present.", null,
table.snapshot(firstSnapshotId));
+ Assert.assertEquals(
+ "Should be 2 manifest lists", 2,
listManifestLists(table.location()).size());
}
@Test
diff --git
a/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java
b/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java
index 08bdc64a08..86842b6812 100644
--- a/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java
+++ b/core/src/test/java/org/apache/iceberg/TestSequenceNumberForV2Table.java
@@ -309,6 +309,8 @@ public class TestSequenceNumberForV2Table extends
TableTestBase {
V2Assert.assertEquals("Snapshot sequence number should be 1", 1,
snap1.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 1", 1,
readMetadata().lastSequenceNumber());
+ V2Assert.assertEquals(
+ "Should be 1 manifest list", 1,
listManifestLists(table.location()).size());
table.newAppend().appendFile(FILE_B).commit();
Snapshot snap2 = table.currentSnapshot();
@@ -319,12 +321,18 @@ public class TestSequenceNumberForV2Table extends
TableTestBase {
V2Assert.assertEquals("Snapshot sequence number should be 2", 2,
snap2.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 2", 2,
readMetadata().lastSequenceNumber());
+ V2Assert.assertEquals(
+ "Should be 2 manifest lists", 2,
listManifestLists(table.location()).size());
Transaction txn = table.newTransaction();
txn.expireSnapshots().expireSnapshotId(commitId1).commit();
txn.commitTransaction();
V2Assert.assertEquals(
"Last sequence number should be 2", 2,
readMetadata().lastSequenceNumber());
+ V2Assert.assertEquals(
+ "Should be 1 manifest list as 1 was deleted",
+ 1,
+ listManifestLists(table.location()).size());
}
@Test