This is an automated email from the ASF dual-hosted git repository.

jinsongzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/amoro.git


The following commit(s) were added to refs/heads/master by this push:
     new 57a8af2eb [AMORO-3608] Enhance RollingFileCleaner to preserve full URI 
for remote filesystems (#3841)
57a8af2eb is described below

commit 57a8af2eb73cdc431c7d8319891099b39c1f0b0f
Author: Xu Bai <[email protected]>
AuthorDate: Mon Oct 27 11:41:46 2025 +0800

    [AMORO-3608] Enhance RollingFileCleaner to preserve full URI for remote 
filesystems (#3841)
    
    * [WAP] Enhance RollingFileCleaner to preserve full URI for remote file 
systems
    
    * Use TableFileUtil for parent directory extraction and handle 
NotFoundException in AuthenticatedFileIOAdapter
---
 .../amoro/server/utils/RollingFileCleaner.java     |  2 +-
 .../amoro/server/util/TestRollingFileCleaner.java  |  2 +-
 .../amoro/io/AuthenticatedFileIOAdapter.java       | 12 ++++--
 .../java/org/apache/amoro/utils/TestFileUtil.java  | 46 ++++++++++++++++++++++
 4 files changed, 57 insertions(+), 5 deletions(-)

diff --git 
a/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java 
b/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java
index 74a977a7a..821bf6661 100644
--- 
a/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java
+++ 
b/amoro-ams/src/main/java/org/apache/amoro/server/utils/RollingFileCleaner.java
@@ -59,7 +59,7 @@ public class RollingFileCleaner {
     }
 
     collectedFiles.add(filePath);
-    String parentDir = new 
Path(URI.create(filePath).getPath()).getParent().toString();
+    String parentDir = TableFileUtil.getParent(filePath);
     parentDirectories.add(parentDir);
     int currentCount = fileCounter.incrementAndGet();
 
diff --git 
a/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
 
b/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
index 3bc562bc1..5931f72f3 100644
--- 
a/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
+++ 
b/amoro-ams/src/test/java/org/apache/amoro/server/util/TestRollingFileCleaner.java
@@ -38,7 +38,7 @@ public class TestRollingFileCleaner {
     // generate some files
     Set<String> expiredFiles = Sets.newHashSet();
     for (int i = 0; i < 5050; i++) {
-      String filePath = "file_" + i + ".txt";
+      String filePath = "file://bucket/warehouse/date=2025-01-01/file_" + i + 
".txt";
       io.addFile(filePath, ("file_content" + i).getBytes());
       expiredFiles.add(filePath);
       fileCleaner.addFile(filePath);
diff --git 
a/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
 
b/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
index 80073f475..5885a5311 100644
--- 
a/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
+++ 
b/amoro-format-iceberg/src/main/java/org/apache/amoro/io/AuthenticatedFileIOAdapter.java
@@ -19,6 +19,7 @@
 package org.apache.amoro.io;
 
 import org.apache.amoro.shade.guava32.com.google.common.base.Preconditions;
+import org.apache.iceberg.exceptions.NotFoundException;
 import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.io.OutputFile;
@@ -53,10 +54,15 @@ public class AuthenticatedFileIOAdapter implements 
AuthenticatedFileIO {
 
   @Override
   public boolean exists(String path) {
-    if (io instanceof AuthenticatedFileIO) {
-      return ((AuthenticatedFileIO) io).exists(path);
+    try {
+      if (io instanceof AuthenticatedFileIO) {
+        return ((AuthenticatedFileIO) io).exists(path);
+      }
+
+      return AuthenticatedFileIO.super.exists(path);
+    } catch (NotFoundException e) {
+      return false;
     }
-    return AuthenticatedFileIO.super.exists(path);
   }
 
   @Override
diff --git 
a/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java 
b/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java
index e4bd2c0e7..a466e6612 100644
--- 
a/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java
+++ 
b/amoro-format-iceberg/src/test/java/org/apache/amoro/utils/TestFileUtil.java
@@ -36,6 +36,7 @@ import org.junit.rules.TemporaryFolder;
 import java.io.File;
 import java.io.IOException;
 import java.io.UncheckedIOException;
+import java.net.URI;
 import java.util.Iterator;
 import java.util.Objects;
 import java.util.Set;
@@ -201,4 +202,49 @@ public class TestFileUtil {
       Assert.assertTrue(io.exists(metadataLocation));
     }
   }
+
+  @Test
+  public void testGetParent() {
+    String filePath =
+        "hdfs://easyops-sloth/user/warehouse/animal_partition_two/base/"
+            + 
"opt_mon=202109/opt_day=26/00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+    String parentDir = TableFileUtil.getParent(filePath);
+    Assert.assertEquals(
+        "hdfs://easyops-sloth/user/warehouse/animal_partition_two/base/"
+            + "opt_mon=202109/opt_day=26",
+        parentDir);
+
+    // test s3 scheme
+    filePath =
+        "s3://my-bucket/user/warehouse/animal_partition_two/base/"
+            + 
"opt_mon=202109/opt_day=26/00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+    parentDir = TableFileUtil.getParent(filePath);
+    Assert.assertEquals(
+        "s3://my-bucket/user/warehouse/animal_partition_two/base/" + 
"opt_mon=202109/opt_day=26",
+        parentDir);
+
+    // test wrapped by URI
+    URI uri = URI.create(filePath);
+    parentDir = TableFileUtil.getParent(uri.toString());
+    Assert.assertEquals(
+        "s3://my-bucket/user/warehouse/animal_partition_two/base/" + 
"opt_mon=202109/opt_day=26",
+        parentDir);
+    // lose scheme when getting path from URI
+    parentDir = TableFileUtil.getParent(uri.getPath());
+    Assert.assertEquals(
+        "/user/warehouse/animal_partition_two/base/opt_mon=202109/opt_day=26", 
parentDir);
+
+    // test no scheme
+    filePath =
+        "/user/warehouse/animal_partition_two/base/opt_mon=202109/opt_day=26/"
+            + 
"00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+    parentDir = TableFileUtil.getParent(filePath);
+    Assert.assertEquals(
+        "/user/warehouse/animal_partition_two/base/opt_mon=202109/opt_day=26", 
parentDir);
+
+    // test root path
+    filePath = 
"/00000-0-3-1-37128f07-0845-43d8-905b-bd69b4ca351c-0000000001.parquet";
+    parentDir = TableFileUtil.getParent(filePath);
+    Assert.assertEquals("/", parentDir);
+  }
 }

Reply via email to