This is an automated email from the ASF dual-hosted git repository.

codope pushed a commit to branch release-0.12.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit cdf2657420cd967dc84332767c5f93c8fa905809
Author: Y Ethan Guo <[email protected]>
AuthorDate: Fri Jul 29 19:25:57 2022 -0700

    [HUDI-4507] Improve file name extraction logic in metadata utils (#6250)
---
 .../java/org/apache/hudi/common/fs/FSUtils.java     | 18 ++++++++++++++++++
 .../hudi/metadata/HoodieTableMetadataUtil.java      | 21 ++++-----------------
 .../java/org/apache/hudi/common/fs/TestFSUtils.java | 12 ++++++++++--
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java 
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index cfc143e3d0..d940f3bb45 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -615,6 +615,24 @@ public class FSUtils {
     return StringUtils.isNullOrEmpty(partitionPath) ? basePath : new 
Path(basePath, partitionPath);
   }
 
+  /**
+   * Extracts the file name from the relative path based on the table base 
path.  For example:
+   * "/2022/07/29/file1.parquet", "/2022/07/29" -> "file1.parquet"
+   * "2022/07/29/file2.parquet", "2022/07/29" -> "file2.parquet"
+   * "/file3.parquet", "" -> "file3.parquet"
+   * "file4.parquet", "" -> "file4.parquet"
+   *
+   * @param filePathWithPartition the relative file path based on the table 
base path.
+   * @param partition             the relative partition path.  For 
partitioned table, `partition` contains the relative partition path;
+   *                              for non-partitioned table, `partition` is 
empty
+   * @return Extracted file name in String.
+   */
+  public static String getFileName(String filePathWithPartition, String 
partition) {
+    int offset = StringUtils.isNullOrEmpty(partition)
+        ? (filePathWithPartition.startsWith("/") ? 1 : 0) : partition.length() 
+ 1;
+    return filePathWithPartition.substring(offset);
+  }
+
   /**
    * Get DFS full partition path (e.g. hdfs://ip-address:8020:/<absolute path>)
    */
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index d41f09990e..2c5b8db0ed 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -325,16 +325,13 @@ public class HoodieTableMetadataUtil {
                           return map;
                         }
 
-                        int offset = partition.equals(NON_PARTITIONED_NAME)
-                            ? (pathWithPartition.startsWith("/") ? 1 : 0)
-                            : partition.length() + 1;
-                        String filename = pathWithPartition.substring(offset);
+                        String fileName = 
FSUtils.getFileName(pathWithPartition, partitionStatName);
 
                         // Since write-stats are coming in no particular 
order, if the same
                         // file have previously been appended to w/in the txn, 
we simply pick max
                         // of the sizes as reported after every write, since 
file-sizes are
                         // monotonically increasing (ie file-size never goes 
down, unless deleted)
-                        map.merge(filename, stat.getFileSizeInBytes(), 
Math::max);
+                        map.merge(fileName, stat.getFileSizeInBytes(), 
Math::max);
 
                         return map;
                       },
@@ -410,12 +407,7 @@ public class HoodieTableMetadataUtil {
         return Collections.emptyListIterator();
       }
 
-      // For partitioned table, "partition" contains the relative partition 
path;
-      // for non-partitioned table, "partition" is empty
-      int offset = StringUtils.isNullOrEmpty(partition)
-          ? (pathWithPartition.startsWith("/") ? 1 : 0) : partition.length() + 
1;
-
-      final String fileName = pathWithPartition.substring(offset);
+      String fileName = FSUtils.getFileName(pathWithPartition, partition);
       if (!FSUtils.isBaseFile(new Path(fileName))) {
         return Collections.emptyListIterator();
       }
@@ -1162,13 +1154,8 @@ public class HoodieTableMetadataUtil {
                                                             
HoodieTableMetaClient datasetMetaClient,
                                                             List<String> 
columnsToIndex,
                                                             boolean isDeleted) 
{
-    String partitionName = getPartitionIdentifier(partitionPath);
-    // NOTE: We have to chop leading "/" to make sure Hadoop does not treat it 
like
-    //       absolute path
     String filePartitionPath = filePath.startsWith("/") ? 
filePath.substring(1) : filePath;
-    String fileName = partitionName.equals(NON_PARTITIONED_NAME)
-        ? filePartitionPath
-        : filePartitionPath.substring(partitionName.length() + 1);
+    String fileName = FSUtils.getFileName(filePath, partitionPath);
 
     if (isDeleted) {
       // TODO we should delete records instead of stubbing them
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java 
b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
index 7506e659c9..481bb1dd45 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
@@ -326,9 +326,17 @@ public class TestFSUtils extends HoodieCommonTestHarness {
     Files.createFile(partitionPath.resolve(log3));
 
     assertEquals(3, (int) FSUtils.getLatestLogVersion(FSUtils.getFs(basePath, 
new Configuration()),
-            new Path(partitionPath.toString()), fileId, LOG_EXTENTION, 
instantTime).get().getLeft());
+        new Path(partitionPath.toString()), fileId, LOG_EXTENTION, 
instantTime).get().getLeft());
     assertEquals(4, FSUtils.computeNextLogVersion(FSUtils.getFs(basePath, new 
Configuration()),
-            new Path(partitionPath.toString()), fileId, LOG_EXTENTION, 
instantTime));
+        new Path(partitionPath.toString()), fileId, LOG_EXTENTION, 
instantTime));
+  }
+
+  @Test
+  public void testGetFilename() {
+    assertEquals("file1.parquet", 
FSUtils.getFileName("/2022/07/29/file1.parquet", "/2022/07/29"));
+    assertEquals("file2.parquet", 
FSUtils.getFileName("2022/07/29/file2.parquet", "2022/07/29"));
+    assertEquals("file3.parquet", FSUtils.getFileName("/file3.parquet", ""));
+    assertEquals("file4.parquet", FSUtils.getFileName("file4.parquet", ""));
   }
 
   private void prepareTestDirectory(FileSystem fileSystem, String rootDir) 
throws IOException {

Reply via email to