This is an automated email from the ASF dual-hosted git repository.
jinsongzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/amoro.git
The following commit(s) were added to refs/heads/master by this push:
new 57a8af2eb [AMORO-3608] Enhance RollingFileCleaner to preserve full URI
for remote filesystems (#3841)
57a8af2eb is described below
commit 57a8af2eb73cdc431c7d8319891099b39c1f0b0f
Author: Xu Bai <[email protected]>
AuthorDate: Mon Oct 27 11:41:46 2025 +0800
[AMORO-3608] Enhance RollingFileCleaner to preserve full URI for remote
filesystems (#3841)
* [WAP] Enhance RollingFileCleaner to preserve full URI for remote file
systems
* Use TableFileUtil for parent directory extraction and handle
NotFoundException in AuthenticatedFileIOAdapter
---
.../amoro/server/utils/RollingFileCleaner.java | 2 +-
.../amoro/server/util/TestRollingFileCleaner.java | 2 +-
.../amoro/io/AuthenticatedFileIOAdapter.java | 12 ++++--
.../java/org/apache/amoro/utils/TestFileUtil.java | 46 ++++++++++++++++++++++
4 files changed, 57 insertions(+), 5 deletions(-)
diff --git
a/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java
b/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java
index 74a977a7a..821bf6661 100644
---
a/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java
+++
b/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java
@@ -59,7 +59,7 @@ public class RollingFileCleaner {
}
collectedFiles.add(filePath);
- String parentDir = new
Path(URI.create(filePath).getPath()).getParent().toString();
+ String parentDir = TableFileUtil.getParent(filePath);
parentDirectories.add(parentDir);
int currentCount = fileCounter.incrementAndGet();
diff --git
a/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
b/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
index 3bc562bc1..5931f72f3 100644
---
a/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
+++
b/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
@@ -38,7 +38,7 @@ public class TestRollingFileCleaner {
// generate some files
Set<String> expiredFiles = Sets.newHashSet();
for (int i = 0; i < 5050; i++) {
- String filePath = "file_" + i + ".txt";
+ String filePath = "file://bucket/warehouse/date=2025-01-01/file_" + i +
".txt";
io.addFile(filePath, ("file_content" + i).getBytes());
expiredFiles.add(filePath);
fileCleaner.addFile(filePath);
diff --git
a/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
b/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
index 80073f475..5885a5311 100644
---
a/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
+++
b/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
@@ -19,6 +19,7 @@
package org.apache.amoro.io;
import org.apache.amoro.shade.guava32.com.google.common.base.Preconditions;
+import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
@@ -53,10 +54,15 @@ public class AuthenticatedFileIOAdapter implements
AuthenticatedFileIO {
@Override
public boolean exists(String path) {
- if (io instanceof AuthenticatedFileIO) {
- return ((AuthenticatedFileIO) io).exists(path);
+ try {
+ if (io instanceof AuthenticatedFileIO) {
+ return ((AuthenticatedFileIO) io).exists(path);
+ }
+
+ return AuthenticatedFileIO.super.exists(path);
+ } catch (NotFoundException e) {
+ return false;
}
- return AuthenticatedFileIO.super.exists(path);
}
@Override
diff --git
a/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java
b/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java
index e4bd2c0e7..a466e6612 100644
---
a/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java
+++
b/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java
@@ -36,6 +36,7 @@ import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
+import java.net.URI;
import java.util.Iterator;
import java.util.Objects;
import java.util.Set;
@@ -201,4 +202,49 @@ public class TestFileUtil {
Assert.assertTrue(io.exists(metadataLocation));
}
}
+
+ @Test
+ public void testGetParent() {
+ String filePath =
+ "hdfs://easyops-sloth/user/warehouse/animal_partition_two/base/"
+ +
"opt_mon=202109/opt_day=26/00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+ String parentDir = TableFileUtil.getParent(filePath);
+ Assert.assertEquals(
+ "hdfs://easyops-sloth/user/warehouse/animal_partition_two/base/"
+ + "opt_mon=202109/opt_day=26",
+ parentDir);
+
+ // test s3 scheme
+ filePath =
+ "s3://my-bucket/user/warehouse/animal_partition_two/base/"
+ +
"opt_mon=202109/opt_day=26/00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+ parentDir = TableFileUtil.getParent(filePath);
+ Assert.assertEquals(
+ "s3://my-bucket/user/warehouse/animal_partition_two/base/" +
"opt_mon=202109/opt_day=26",
+ parentDir);
+
+ // test wrapped by URI
+ URI uri = URI.create(filePath);
+ parentDir = TableFileUtil.getParent(uri.toString());
+ Assert.assertEquals(
+ "s3://my-bucket/user/warehouse/animal_partition_two/base/" +
"opt_mon=202109/opt_day=26",
+ parentDir);
+ // lose scheme when getting path from URI
+ parentDir = TableFileUtil.getParent(uri.getPath());
+ Assert.assertEquals(
+ "/user/warehouse/animal_partition_two/base/opt_mon=202109/opt_day=26",
parentDir);
+
+ // test no scheme
+ filePath =
+ "/user/warehouse/animal_partition_two/base/opt_mon=202109/opt_day=26/"
+ +
"00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+ parentDir = TableFileUtil.getParent(filePath);
+ Assert.assertEquals(
+ "/user/warehouse/animal_partition_two/base/opt_mon=202109/opt_day=26",
parentDir);
+
+ // test root path
+ filePath =
"/00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+ parentDir = TableFileUtil.getParent(filePath);
+ Assert.assertEquals("/", parentDir);
+ }
}