This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new d79360cb5f90 fix: Handle external file groups in ExternalFilePathUtil 
(#17788)
d79360cb5f90 is described below

commit d79360cb5f9030c4fc32da2bcc9c748b34602d7a
Author: Vinish Reddy <[email protected]>
AuthorDate: Tue Jan 27 03:29:19 2026 +0530

    fix: Handle external file groups in ExternalFilePathUtil (#17788)
---
 .../functional/TestExternalPathHandling.java       |  59 +++++-
 .../apache/hudi/common/model/HoodieBaseFile.java   |  35 +---
 .../table/view/AbstractTableFileSystemView.java    |   8 +-
 .../hudi/common/util/ExternalFilePathUtil.java     | 154 ++++++++++++++
 .../hudi/common/util/TestExternalFilePathUtil.java | 233 +++++++++++++++++++++
 5 files changed, 444 insertions(+), 45 deletions(-)

diff --git 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestExternalPathHandling.java
 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestExternalPathHandling.java
index 2eb39df74f01..ce94187aacbc 100644
--- 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestExternalPathHandling.java
+++ 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestExternalPathHandling.java
@@ -113,13 +113,14 @@ public class TestExternalPathHandling extends 
HoodieClientTestBase {
 
     writeClient = getHoodieWriteClient(writeConfig);
     writeClient.setOperationType(WriteOperationType.INSERT_OVERWRITE);
+    Option<String> prefix = fileIdAndNameGenerator.getPrefix();
     String instantTime1 = 
writeClient.startCommit(HoodieTimeline.REPLACE_COMMIT_ACTION, metaClient);
     String partitionPath1 = partitions.get(0);
     Pair<String, String> fileIdAndName1 = fileIdAndNameGenerator.generate(1, 
instantTime1);
     String fileId1 = fileIdAndName1.getLeft();
     String fileName1 = fileIdAndName1.getRight();
-    String filePath1 = getPath(partitionPath1, fileName1);
-    WriteStatus writeStatus1 = createWriteStatus(instantTime1, partitionPath1, 
filePath1, fileId1);
+    String filePath1 = getPath(partitionPath1, fileName1, prefix);
+    WriteStatus writeStatus1 = createWriteStatus(instantTime1, partitionPath1, 
filePath1, fileId1, fileName1, prefix);
     JavaRDD<WriteStatus> rdd1 = 
createRdd(Collections.singletonList(writeStatus1));
     metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(
         INSTANT_GENERATOR.createNewInstant(HoodieInstant.State.REQUESTED, 
HoodieTimeline.REPLACE_COMMIT_ACTION, instantTime1), Option.empty());
@@ -132,8 +133,8 @@ public class TestExternalPathHandling extends 
HoodieClientTestBase {
     Pair<String, String> fileIdAndName2 = fileIdAndNameGenerator.generate(2, 
instantTime2);
     String fileId2 = fileIdAndName2.getLeft();
     String fileName2 = fileIdAndName2.getRight();
-    String filePath2 = getPath(partitionPath1, fileName2);
-    WriteStatus newWriteStatus = createWriteStatus(instantTime2, 
partitionPath1, filePath2, fileId2);
+    String filePath2 = getPath(partitionPath1, fileName2, prefix);
+    WriteStatus newWriteStatus = createWriteStatus(instantTime2, 
partitionPath1, filePath2, fileId2, fileName2, prefix);
     JavaRDD<WriteStatus> rdd2 = 
createRdd(Collections.singletonList(newWriteStatus));
     Map<String, List<String>> partitionToReplacedFileIds = new HashMap<>();
     partitionToReplacedFileIds.put(partitionPath1, 
Collections.singletonList(fileId1));
@@ -149,8 +150,8 @@ public class TestExternalPathHandling extends 
HoodieClientTestBase {
     Pair<String, String> fileIdAndName3 = fileIdAndNameGenerator.generate(3, 
instantTime3);
     String fileId3 = fileIdAndName3.getLeft();
     String fileName3 = fileIdAndName3.getRight();
-    String filePath3 = getPath(partitionPath2, fileName3);
-    WriteStatus writeStatus3 = createWriteStatus(instantTime3, partitionPath2, 
filePath3, fileId3);
+    String filePath3 = getPath(partitionPath2, fileName3, prefix);
+    WriteStatus writeStatus3 = createWriteStatus(instantTime3, partitionPath2, 
filePath3, fileId3, fileName3, prefix);
     JavaRDD<WriteStatus> rdd3 = 
createRdd(Collections.singletonList(writeStatus3));
     metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(
         INSTANT_GENERATOR.createNewInstant(HoodieInstant.State.REQUESTED, 
HoodieTimeline.REPLACE_COMMIT_ACTION, instantTime3), Option.empty());
@@ -202,21 +203,50 @@ public class TestExternalPathHandling extends 
HoodieClientTestBase {
       String fileId = fileName;
       return Pair.of(fileId, fileName);
     };
+    FileIdAndNameGenerator paimonExternal = new FileIdAndNameGenerator() {
+      private static final String PREFIX = "bucket-0";
+
+      @Override
+      public Pair<String, String> generate(int iteration, String instantTime) {
+        String fileName = String.format("file_%d.parquet", iteration);
+        String fileId = PREFIX + "/" + fileName;
+        return Pair.of(fileId, fileName);
+      }
+
+      @Override
+      public Option<String> getPrefix() {
+        return Option.of(PREFIX);
+      }
+    };
     List<String> partitionedTable = Arrays.asList("americas/brazil", 
"americas/argentina");
     List<String> unpartitionedTable = Arrays.asList("", "");
-    return Stream.of(Arguments.of(external, partitionedTable), 
Arguments.of(external, unpartitionedTable));
+    return Stream.of(
+        Arguments.of(external, partitionedTable),
+        Arguments.of(external, unpartitionedTable),
+        Arguments.of(paimonExternal, partitionedTable),
+        Arguments.of(paimonExternal, unpartitionedTable));
   }
 
   private String getPath(String partitionPath, String fileName) {
+    return getPath(partitionPath, fileName, Option.empty());
+  }
+
+  private String getPath(String partitionPath, String fileName, Option<String> 
prefix) {
+    String prefixPath = prefix.map(p -> p + "/").orElse("");
     if (partitionPath.isEmpty()) {
-      return fileName;
+      return prefixPath + fileName;
     }
-    return String.format("%s/%s", partitionPath, fileName);
+    return String.format("%s/%s%s", partitionPath, prefixPath, fileName);
   }
 
   @FunctionalInterface
   private interface FileIdAndNameGenerator {
+    // Returns: Pair<fileId, fileName>
     Pair<String, String> generate(int iteration, String instantTime);
+
+    default Option<String> getPrefix() {
+      return Option.empty();
+    }
   }
 
   private void assertFileGroupCorrectness(String instantTime, String 
partitionPath, String filePath, String fileId, int expectedSize) {
@@ -271,13 +301,20 @@ public class TestExternalPathHandling extends 
HoodieClientTestBase {
     return jsc.parallelize(writeStatuses, 1);
   }
 
-  private WriteStatus createWriteStatus(String commitTime, String 
partitionPath, String filePath, String fileId) {
+  private WriteStatus createWriteStatus(String commitTime, String 
partitionPath, String filePath,
+      String fileId, String fileName, Option<String> prefix) {
     WriteStatus writeStatus = new WriteStatus();
     writeStatus.setFileId(fileId);
     writeStatus.setPartitionPath(partitionPath);
     HoodieDeltaWriteStat writeStat = new HoodieDeltaWriteStat();
     writeStat.setFileId(fileId);
-    
writeStat.setPath(ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(filePath,
 commitTime));
+    String markedFileName = prefix.isPresent()
+        ? ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(fileName, 
commitTime, prefix.get())
+        : ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(filePath, 
commitTime);
+    String fullMarkedPath = prefix.isPresent()
+        ? getPath(partitionPath, markedFileName, prefix)
+        : markedFileName;
+    writeStat.setPath(fullMarkedPath);
     writeStat.setPartitionPath(partitionPath);
     writeStat.setNumWrites(3);
     writeStat.setNumDeletes(0);
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java 
b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java
index f56310782708..5fcb9eb2407a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java
@@ -20,7 +20,6 @@ package org.apache.hudi.common.model;
 
 import org.apache.hudi.common.util.ExternalFilePathUtil;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.storage.StoragePath;
 import org.apache.hudi.storage.StoragePathInfo;
 
 /**
@@ -78,7 +77,7 @@ public class HoodieBaseFile extends BaseFile {
 
   public HoodieBaseFile(StoragePathInfo pathInfo, String fileId, String 
commitTime,
                         BaseFile bootstrapBaseFile) {
-    super(maybeHandleExternallyGeneratedFileName(pathInfo, fileId));
+    
super(ExternalFilePathUtil.maybeHandleExternallyGeneratedFileName(pathInfo, 
fileId));
     this.bootstrapBaseFile = Option.ofNullable(bootstrapBaseFile);
     this.fileId = fileId;
     this.commitTime = commitTime;
@@ -118,37 +117,7 @@ public class HoodieBaseFile extends BaseFile {
   }
 
   private static String[] handleExternallyGeneratedFile(String fileName) {
-    String[] values = new String[2];
-    // file name has format <originalFileName>_<commitTime>_hudiext and 
originalFileName is used as fileId
-    int lastUnderscore = fileName.lastIndexOf(UNDERSCORE);
-    int secondToLastUnderscore = fileName.lastIndexOf(UNDERSCORE, 
lastUnderscore - 1);
-    values[0] = fileName.substring(0, secondToLastUnderscore);
-    values[1] = fileName.substring(secondToLastUnderscore + 1, lastUnderscore);
-    return values;
-  }
-
-  /**
-   * If the file was created externally, the original file path will have a 
'_[commitTime]_hudiext' suffix when stored in the metadata table. That suffix 
needs to be removed from the FileStatus so
-   * that the actual file can be found and read.
-   *
-   * @param pathInfo an input path info that may require updating
-   * @param fileId   the fileId for the file
-   * @return the original file status if it was not externally created, or a 
new FileStatus with the original file name if it was externally created
-   */
-  private static StoragePathInfo 
maybeHandleExternallyGeneratedFileName(StoragePathInfo pathInfo,
-                                                                        String 
fileId) {
-    if (pathInfo == null) {
-      return null;
-    }
-    if 
(ExternalFilePathUtil.isExternallyCreatedFile(pathInfo.getPath().getName())) {
-      // fileId is the same as the original file name for externally created 
files
-      StoragePath parent = pathInfo.getPath().getParent();
-      return new StoragePathInfo(
-          new StoragePath(parent, fileId), pathInfo.getLength(), 
pathInfo.isDirectory(),
-          pathInfo.getBlockReplication(), pathInfo.getBlockSize(), 
pathInfo.getModificationTime(), pathInfo.getLocations());
-    } else {
-      return pathInfo;
-    }
+    return 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(fileName);
   }
 
   public String getFileId() {
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
index 36611771fe16..075a58bf7bd5 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
@@ -38,6 +38,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.ClusteringUtils;
 import org.apache.hudi.common.util.CompactionUtils;
+import org.apache.hudi.common.util.ExternalFilePathUtil;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SamplingLogger;
@@ -174,7 +175,12 @@ public abstract class AbstractTableFileSystemView 
implements SyncableFileSystemV
    */
   public List<HoodieFileGroup> addFilesToView(List<StoragePathInfo> statuses) {
     Map<String, List<StoragePathInfo>> statusesByPartitionPath = 
statuses.stream()
-        .collect(Collectors.groupingBy(fileStatus -> 
FSUtils.getRelativePartitionPath(metaClient.getBasePath(), 
fileStatus.getPath().getParent())));
+        .collect(Collectors.groupingBy(fileStatus -> {
+          String fileName = fileStatus.getPath().getName();
+          StoragePath parent = ExternalFilePathUtil.getFullPathOfPartition(
+              fileStatus.getPath().getParent(), fileName);
+          return FSUtils.getRelativePartitionPath(metaClient.getBasePath(), 
parent);
+        }));
     return statusesByPartitionPath.entrySet().stream().map(entry -> 
addFilesToView(entry.getKey(), entry.getValue()))
         .flatMap(List::stream).collect(Collectors.toList());
   }
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/util/ExternalFilePathUtil.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/util/ExternalFilePathUtil.java
index 223ae8abc42b..41a86078ee3a 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/util/ExternalFilePathUtil.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/util/ExternalFilePathUtil.java
@@ -18,12 +18,44 @@
 
 package org.apache.hudi.common.util;
 
+import org.apache.hudi.storage.StoragePath;
+import org.apache.hudi.storage.StoragePathInfo;
+
 /**
  * Utility methods for handling externally created files.
+ * <p>
+ * External files are files created outside Hudi (by external formats) that 
are later
+ * registered in a Hudi table. To distinguish these files from Hudi-generated 
files and
+ * to include commit time information, they are marked with a special suffix.
+ * <p>
+ * Hudi supports two formats for external files:
+ * <p>
+ * <b>Format 1: Without file group prefix</b>
+ * <pre>{@code <originalFileName>_<commitTime>_hudiext}</pre>
+ * Example: {@code data.parquet_20240101000000_hudiext}
+ * <ul>
+ *   <li>fileId: {@code data.parquet}</li>
+ *   <li>commitTime: {@code 20240101000000}</li>
+ * </ul>
+ * <p>
+ * <b>Format 2: With file group prefix</b>
+ * <pre>{@code 
<originalFileName>_<commitTime>_fg%3D<encodedPrefix>_hudiext}</pre>
+ * Example: {@code data.parquet_20240101000000_fg%3Dbucket-0_hudiext}
+ * <ul>
+ *   <li>fileId: {@code bucket-0/data.parquet} (includes prefix path)</li>
+ *   <li>commitTime: {@code 20240101000000}</li>
+ *   <li>prefix: {@code bucket-0} (URL-decoded from {@code _fg%3D} marker)</li>
+ * </ul>
+ * <p>
+ * The file group prefix format is used when external files are organized in 
subdirectories
+ * within a partition (e.g., bucketed files). The prefix is URL-encoded to 
avoid conflicts
+ * with special characters in directory names.
  */
 public class ExternalFilePathUtil {
   // Suffix acts as a marker when appended to a file path that the path was 
created by an external system and not a Hudi writer.
   private static final String EXTERNAL_FILE_SUFFIX = "_hudiext";
+  // Marker for file group prefix in external file names. URL-encoded "_fg=" 
to avoid conflicts with file names.
+  private static final String FILE_GROUP_PREFIX_MARKER = "_fg%3D";
 
   /**
    * Appends the commit time and external file marker to the file path. Hudi 
relies on the commit time in the file name for properly generating views of the 
files in a table.
@@ -35,6 +67,23 @@ public class ExternalFilePathUtil {
     return filePath + "_" + commitTime + EXTERNAL_FILE_SUFFIX;
   }
 
+  /**
+   * Appends the commit time, file group prefix, and external file marker to 
the file name.
+   * Use this when the external file is located in a subdirectory within the 
partition (e.g., bucket-0/file.parquet).
+   *
+   * @param fileName The original file name (without any path prefix)
+   * @param commitTime The time of the commit that added this file to the table
+   * @param externalFileGroupPrefix The prefix path where the file is located 
(e.g., "bucket-0"). Can be null or empty.
+   * @return The file name with commit time and markers appended
+   */
+  public static String appendCommitTimeAndExternalFileMarker(String fileName, 
String commitTime, String externalFileGroupPrefix) {
+    if (externalFileGroupPrefix == null || externalFileGroupPrefix.isEmpty()) {
+      return appendCommitTimeAndExternalFileMarker(fileName, commitTime);
+    }
+    String encodedPrefix = 
PartitionPathEncodeUtils.escapePathName(externalFileGroupPrefix);
+    return fileName + "_" + commitTime + FILE_GROUP_PREFIX_MARKER + 
encodedPrefix + EXTERNAL_FILE_SUFFIX;
+  }
+
   /**
    * Checks if the file name was created by an external system by checking for 
the external file marker at the end of the file name.
    * @param fileName The file name
@@ -43,4 +92,109 @@ public class ExternalFilePathUtil {
   public static boolean isExternallyCreatedFile(String fileName) {
     return fileName.endsWith(EXTERNAL_FILE_SUFFIX);
   }
+
+  /**
+   * Extracts the file group prefix from an external file name.
+   * @param fileName The external file name
+   * @return Option containing the decoded file group prefix, or empty if not 
present
+   */
+  private static Option<String> getExternalFileGroupPrefix(String fileName) {
+    if (!isExternallyCreatedFile(fileName)) {
+      return Option.empty();
+    }
+    int prefixMarkerIndex = fileName.indexOf(FILE_GROUP_PREFIX_MARKER);
+    if (prefixMarkerIndex == -1) {
+      return Option.empty();
+    }
+    int start = prefixMarkerIndex + FILE_GROUP_PREFIX_MARKER.length();
+    int end = fileName.lastIndexOf(EXTERNAL_FILE_SUFFIX);
+    return 
Option.of(PartitionPathEncodeUtils.unescapePathName(fileName.substring(start, 
end)));
+  }
+
+  /**
+   * Extracts the original file name from an external file name (without 
commit time and markers).
+   * For example, "data.parquet_123_hudiext" returns "data.parquet"
+   * And "data.parquet_123_fg%3Dbucket-0_hudiext" also returns "data.parquet"
+   *
+   * @param fileName The external file name
+   * @return The original file name
+   */
+  private static String getOriginalFileName(String fileName) {
+    if (!isExternallyCreatedFile(fileName)) {
+      return fileName;
+    }
+    int prefixMarkerIndex = fileName.indexOf(FILE_GROUP_PREFIX_MARKER);
+    int markerEnd = prefixMarkerIndex != -1
+        ? prefixMarkerIndex
+        : fileName.lastIndexOf(EXTERNAL_FILE_SUFFIX);
+    int commitTimeStart = fileName.lastIndexOf('_', markerEnd - 1);
+    return fileName.substring(0, commitTimeStart);
+  }
+
+  /**
+   * Adjusts the parent path for external files with file group prefix.
+   * For files with file group prefix, the prefix represents subdirectories 
within the partition,
+   * so we need to remove the prefix portion to get the actual partition path.
+   * Supports arbitrary nesting depths (e.g., "bucket-0/subdir1/subdir2").
+   *
+   * @param parent the parent path
+   * @param fileName the file name to check
+   * @return the adjusted parent path
+   */
+  public static StoragePath getFullPathOfPartition(StoragePath parent, String 
fileName) {
+    return getExternalFileGroupPrefix(fileName)
+        .map(prefix -> new StoragePath(parent.toString().substring(0, 
parent.toString().length() - prefix.length() - 1)))
+        .orElse(parent);
+  }
+
+  /**
+   * Parses external file names to extract fileId and commit time.
+   * Handles both formats:
+   *   - With prefix: originalName_commitTime_fg%3D<prefix>_hudiext -> fileId 
= prefix/originalName
+   *   - Without prefix: originalName_commitTime_hudiext -> fileId = 
originalName
+   *
+   * @param fileName The external file name to parse
+   * @return String array of size 2: [fileId, commitTime]
+   */
+  public static String[] parseFileIdAndCommitTimeFromExternalFile(String 
fileName) {
+    String[] values = new String[2];
+    // Extract original file name
+    String originalName = getOriginalFileName(fileName);
+    // Extract file group prefix (if present)
+    Option<String> prefix = getExternalFileGroupPrefix(fileName);
+    // Build fileId
+    values[0] = prefix.map(p -> p + "/" + originalName).orElse(originalName);
+    // Extract commitTime
+    int prefixMarkerIndex = fileName.indexOf(FILE_GROUP_PREFIX_MARKER);
+    int markerEnd = prefixMarkerIndex != -1
+        ? prefixMarkerIndex
+        : fileName.lastIndexOf(EXTERNAL_FILE_SUFFIX);
+    int commitTimeStart = fileName.lastIndexOf('_', markerEnd - 1);
+    values[1] = fileName.substring(commitTimeStart + 1, markerEnd);
+    return values;
+  }
+
+  /**
+   * If the file was created externally, the original file path will have a 
'_[commitTime]_hudiext' suffix when stored in the metadata table. That suffix 
needs to be removed from the FileStatus so
+   * that the actual file can be found and read.
+   *
+   * @param pathInfo an input path info that may require updating
+   * @param fileId   the fileId for the file
+   * @return the original file status if it was not externally created, or a 
new FileStatus with the original file name if it was externally created
+   */
+  public static StoragePathInfo 
maybeHandleExternallyGeneratedFileName(StoragePathInfo pathInfo,
+                                                                        String 
fileId) {
+    if (pathInfo == null) {
+      return null;
+    }
+    if (isExternallyCreatedFile(pathInfo.getPath().getName())) {
+      String fileName = pathInfo.getPath().getName();
+      StoragePath parent = 
getFullPathOfPartition(pathInfo.getPath().getParent(), fileName);
+      return new StoragePathInfo(
+          new StoragePath(parent, fileId), pathInfo.getLength(), 
pathInfo.isDirectory(),
+          pathInfo.getBlockReplication(), pathInfo.getBlockSize(), 
pathInfo.getModificationTime(), pathInfo.getLocations());
+    } else {
+      return pathInfo;
+    }
+  }
 }
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/util/TestExternalFilePathUtil.java
 
b/hudi-common/src/test/java/org/apache/hudi/common/util/TestExternalFilePathUtil.java
new file mode 100644
index 000000000000..72ce3278013d
--- /dev/null
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/util/TestExternalFilePathUtil.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.apache.hudi.storage.StoragePath;
+import org.apache.hudi.storage.StoragePathInfo;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestExternalFilePathUtil {
+
+  private static final String COMMIT_TIME = "20240101000000";
+  private static final String EXTERNAL_FILE_SUFFIX = "_hudiext";
+
+  @Test
+  public void testAppendCommitTimeAndExternalFileMarker() {
+    String filePath = "partition1/file1.parquet";
+    String result = 
ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(filePath, 
COMMIT_TIME);
+    assertEquals("partition1/file1.parquet_20240101000000_hudiext", result);
+  }
+
+  @Test
+  public void testAppendCommitTimeAndExternalFileMarkerWithPrefix() {
+    String fileName = "file1.parquet";
+    String prefix = "bucket-0";
+    String result = 
ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(fileName, 
COMMIT_TIME, prefix);
+    assertEquals("file1.parquet_20240101000000_fg%3Dbucket-0_hudiext", result);
+  }
+
+  @Test
+  public void testAppendCommitTimeAndExternalFileMarkerWithNullPrefix() {
+    String fileName = "file1.parquet";
+    String result = 
ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(fileName, 
COMMIT_TIME, null);
+    assertEquals("file1.parquet_20240101000000_hudiext", result);
+  }
+
+  @Test
+  public void testAppendCommitTimeAndExternalFileMarkerWithEmptyPrefix() {
+    String fileName = "file1.parquet";
+    String result = 
ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(fileName, 
COMMIT_TIME, "");
+    assertEquals("file1.parquet_20240101000000_hudiext", result);
+  }
+
+  @Test
+  public void testIsExternallyCreatedFile() {
+    
assertTrue(ExternalFilePathUtil.isExternallyCreatedFile("file1.parquet_20240101000000_hudiext"));
+    
assertTrue(ExternalFilePathUtil.isExternallyCreatedFile("file1.parquet_20240101000000_fg%3Dbucket-0_hudiext"));
+    
assertFalse(ExternalFilePathUtil.isExternallyCreatedFile("file1_1-0-1_20240101000000.parquet"));
+    assertFalse(ExternalFilePathUtil.isExternallyCreatedFile("file1.parquet"));
+  }
+
+  @Test
+  public void testParseFileIdAndCommitTimeFromExternalFile_LegacyFormat() {
+    String fileName = "myfile.parquet_20240101000000_hudiext";
+    String[] result = 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(fileName);
+
+    assertNotNull(result);
+    assertEquals(2, result.length);
+    assertEquals("myfile.parquet", result[0]); // fileId
+    assertEquals("20240101000000", result[1]); // commitTime
+  }
+
+  @Test
+  public void testParseFileIdAndCommitTimeFromExternalFile_WithPrefix() {
+    String fileName = "data.parquet_20240101000000_fg%3Dbucket-0_hudiext";
+    String[] result = 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(fileName);
+
+    assertNotNull(result);
+    assertEquals(2, result.length);
+    assertEquals("bucket-0/data.parquet", result[0]); // fileId includes prefix
+    assertEquals("20240101000000", result[1]); // commitTime
+  }
+
+  @Test
+  public void 
testParseFileIdAndCommitTimeFromExternalFile_FileWithUnderscores() {
+    String fileName = "my_data_file.parquet_20240101000000_hudiext";
+    String[] result = 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(fileName);
+
+    assertNotNull(result);
+    assertEquals(2, result.length);
+    assertEquals("my_data_file.parquet", result[0]);
+    assertEquals("20240101000000", result[1]);
+  }
+
+  @Test
+  public void 
testParseFileIdAndCommitTimeFromExternalFile_FileWithMultipleDots() {
+    String fileName = "data.backup.parquet_20240101000000_hudiext";
+    String[] result = 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(fileName);
+
+    assertNotNull(result);
+    assertEquals(2, result.length);
+    assertEquals("data.backup.parquet", result[0]);
+    assertEquals("20240101000000", result[1]);
+  }
+
+  @Test
+  public void testParseFileIdAndCommitTimeFromExternalFile_NoExtension() {
+    String fileName = "datafile_20240101000000_hudiext";
+    String[] result = 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(fileName);
+
+    assertNotNull(result);
+    assertEquals(2, result.length);
+    assertEquals("datafile", result[0]);
+    assertEquals("20240101000000", result[1]);
+  }
+
+  @Test
+  public void testGetFullPath_OfPartition_WithPrefix() {
+    StoragePath parent = new StoragePath("/table/partition1/bucket-0");
+    String fileName = "file1.parquet_20240101000000_fg%3Dbucket-0_hudiext";
+
+    StoragePath result = ExternalFilePathUtil.getFullPathOfPartition(parent, 
fileName);
+    assertEquals(new StoragePath("/table/partition1"), result);
+  }
+
+  @Test
+  public void testGetFullPath_OfPartition_WithNestedPrefix_2Levels() {
+    StoragePath parent = new StoragePath("/table/partition1/bucket-0/subdir");
+    String fileName = 
"file1.parquet_20240101000000_fg%3Dbucket-0%2Fsubdir_hudiext";
+
+    StoragePath result = ExternalFilePathUtil.getFullPathOfPartition(parent, 
fileName);
+    assertEquals(new StoragePath("/table/partition1"), result);
+  }
+
+  @Test
+  public void testGetFullPath_OfPartition_WithNestedPrefix_3Levels() {
+    StoragePath parent = new 
StoragePath("/table/partition1/bucket-0/subdir1/subdir2");
+    String fileName = 
"file1.parquet_20240101000000_fg%3Dbucket-0%2Fsubdir1%2Fsubdir2_hudiext";
+
+    StoragePath result = ExternalFilePathUtil.getFullPathOfPartition(parent, 
fileName);
+    assertEquals(new StoragePath("/table/partition1"), result);
+  }
+
+  @Test
+  public void testGetFullPath_OfPartition_WithoutPrefix() {
+    StoragePath parent = new StoragePath("/table/partition1");
+    String fileName = "file1.parquet_20240101000000_hudiext";
+
+    StoragePath result = ExternalFilePathUtil.getFullPathOfPartition(parent, 
fileName);
+    assertEquals(new StoragePath("/table/partition1"), result);
+  }
+
+  @Test
+  public void testMaybeHandleExternallyGeneratedFileName_NullPathInfo() {
+    StoragePathInfo result = 
ExternalFilePathUtil.maybeHandleExternallyGeneratedFileName(null, "fileId");
+    assertNull(result);
+  }
+
+  @Test
+  public void testMaybeHandleExternallyGeneratedFileName_NonExternalFile() {
+    StoragePath path = new 
StoragePath("/table/partition1/file_1-0-1_20240101000000.parquet");
+    StoragePathInfo pathInfo = new StoragePathInfo(path, 1000, false, (short) 
1, 128 * 1024 * 1024, 0);
+
+    StoragePathInfo result = 
ExternalFilePathUtil.maybeHandleExternallyGeneratedFileName(pathInfo, "fileId");
+    assertEquals(pathInfo, result);
+  }
+
+  @Test
+  public void testMaybeHandleExternallyGeneratedFileName_ExternalFileLegacy() {
+    StoragePath originalPath = new 
StoragePath("/table/partition1/file1.parquet_20240101000000_hudiext");
+    StoragePathInfo pathInfo = new StoragePathInfo(originalPath, 1000, false, 
(short) 1, 128 * 1024 * 1024, 12345);
+    String fileId = "file1.parquet";
+
+    StoragePathInfo result = 
ExternalFilePathUtil.maybeHandleExternallyGeneratedFileName(pathInfo, fileId);
+
+    assertNotNull(result);
+    assertEquals(new StoragePath("/table/partition1/file1.parquet"), 
result.getPath());
+    assertEquals(1000, result.getLength());
+    assertEquals(12345, result.getModificationTime());
+  }
+
+  @Test
+  public void 
testMaybeHandleExternallyGeneratedFileName_ExternalFileWithPrefix() {
+    StoragePath originalPath = new 
StoragePath("/table/partition1/bucket-0/file1.parquet_20240101000000_fg%3Dbucket-0_hudiext");
+    StoragePathInfo pathInfo = new StoragePathInfo(originalPath, 1000, false, 
(short) 1, 128 * 1024 * 1024, 12345);
+    String fileId = "bucket-0/file1.parquet";
+
+    StoragePathInfo result = 
ExternalFilePathUtil.maybeHandleExternallyGeneratedFileName(pathInfo, fileId);
+
+    assertNotNull(result);
+    // Parent should go up two levels (bucket-0 and then original parent), 
resulting in /table/partition1/bucket-0/file1.parquet
+    assertEquals(new StoragePath("/table/partition1/bucket-0/file1.parquet"), 
result.getPath());
+    assertEquals(1000, result.getLength());
+    assertEquals(12345, result.getModificationTime());
+  }
+
+  @Test
+  public void testRoundTrip_LegacyFormat() {
+    String originalFile = "mydata.parquet";
+    String withMarker = 
ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(originalFile, 
COMMIT_TIME);
+
+    assertTrue(ExternalFilePathUtil.isExternallyCreatedFile(withMarker));
+
+    String[] parsed = 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(withMarker);
+    assertEquals(originalFile, parsed[0]);
+    assertEquals(COMMIT_TIME, parsed[1]);
+  }
+
+  @Test
+  public void testRoundTrip_WithPrefix() {
+    String originalFile = "mydata.parquet";
+    String prefix = "bucket-0";
+    String withMarker = 
ExternalFilePathUtil.appendCommitTimeAndExternalFileMarker(originalFile, 
COMMIT_TIME, prefix);
+
+    assertTrue(ExternalFilePathUtil.isExternallyCreatedFile(withMarker));
+
+    String[] parsed = 
ExternalFilePathUtil.parseFileIdAndCommitTimeFromExternalFile(withMarker);
+    assertEquals(prefix + "/" + originalFile, parsed[0]);
+    assertEquals(COMMIT_TIME, parsed[1]);
+  }
+}

Reply via email to