This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 48d71266418 [MINOR] Make bootstrap operator invoke MDT's 
getAllPartitions method (#8806)
48d71266418 is described below

commit 48d71266418c10c36f695fbe7c7b6dafce0ffb77
Author: flashJd <[email protected]>
AuthorDate: Tue May 30 17:09:35 2023 +0800

    [MINOR] Make bootstrap operator invoke MDT's getAllPartitions method (#8806)
---
 .../java/org/apache/hudi/common/fs/FSUtils.java    | 27 ----------------------
 .../hudi/sink/bootstrap/BootstrapOperator.java     |  4 +++-
 .../java/org/apache/hudi/source/FileIndex.java     |  2 +-
 3 files changed, 4 insertions(+), 29 deletions(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java 
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index a7a32ae527e..fcb83f9cffb 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.inline.InLineFileSystem;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
@@ -246,32 +245,6 @@ public class FSUtils {
         : fullPartitionPathStr.substring(partitionStartIndex + 
basePath.getName().length() + 1);
   }
 
-  /**
-   * Obtain all the partition paths, that are present in this table, denoted 
by presence of
-   * {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE_PREFIX}.
-   *
-   * If the basePathStr is a subdirectory of .hoodie folder then we assume 
that the partitions of an internal
-   * table (a hoodie table within the .hoodie directory) are to be obtained.
-   *
-   * @param fs FileSystem instance
-   * @param basePathStr base directory
-   */
-  public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, 
String basePathStr) throws IOException {
-    // If the basePathStr is a folder within the .hoodie directory then we are 
listing partitions within an
-    // internal table.
-    final boolean isMetadataTable = 
HoodieTableMetadata.isMetadataTable(basePathStr);
-    final Path basePath = new Path(basePathStr);
-    final List<String> partitions = new ArrayList<>();
-    processFiles(fs, basePathStr, (locatedFileStatus) -> {
-      Path filePath = locatedFileStatus.getPath();
-      if 
(filePath.getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX))
 {
-        partitions.add(getRelativePartitionPath(basePath, 
filePath.getParent()));
-      }
-      return true;
-    }, !isMetadataTable);
-    return partitions;
-  }
-
   /**
    * Recursively processes all files in the base-path. If excludeMetaFolder is 
set, the meta-folder and all its subdirs
    * are skipped
diff --git 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
index 00fb49ee0c9..7c9daf4075d 100644
--- 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
+++ 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.sink.bootstrap;
 
+import org.apache.hudi.client.common.HoodieFlinkEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieAvroRecord;
@@ -68,6 +69,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 
 import static java.util.stream.Collectors.toList;
+import static org.apache.hudi.source.FileIndex.metadataConfig;
 import static org.apache.hudi.util.StreamerUtil.isValidFile;
 
 /**
@@ -140,7 +142,7 @@ public class BootstrapOperator<I, O extends HoodieRecord<?>>
     String basePath = hoodieTable.getMetaClient().getBasePath();
     int taskID = getRuntimeContext().getIndexOfThisSubtask();
     LOG.info("Start loading records in table {} into the index state, taskId = 
{}", basePath, taskID);
-    for (String partitionPath : 
FSUtils.getAllFoldersWithPartitionMetaFile(FSUtils.getFs(basePath, hadoopConf), 
basePath)) {
+    for (String partitionPath : FSUtils.getAllPartitionPaths(new 
HoodieFlinkEngineContext(hadoopConf), metadataConfig(conf), basePath)) {
       if (pattern.matcher(partitionPath).matches()) {
         loadRecords(partitionPath);
       }
diff --git 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
index 87f27d2247a..2ddf10ef171 100644
--- 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
+++ 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
@@ -285,7 +285,7 @@ public class FileIndex {
     return this.partitionPaths;
   }
 
-  private static HoodieMetadataConfig 
metadataConfig(org.apache.flink.configuration.Configuration conf) {
+  public static HoodieMetadataConfig 
metadataConfig(org.apache.flink.configuration.Configuration conf) {
     Properties properties = new Properties();
 
     // set up metadata.enabled=true in table DDL to enable metadata listing

Reply via email to