This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 48d71266418 [MINOR] Make bootstrap operator invoke MDT's
getAllPartitions method (#8806)
48d71266418 is described below
commit 48d71266418c10c36f695fbe7c7b6dafce0ffb77
Author: flashJd <[email protected]>
AuthorDate: Tue May 30 17:09:35 2023 +0800
[MINOR] Make bootstrap operator invoke MDT's getAllPartitions method (#8806)
---
.../java/org/apache/hudi/common/fs/FSUtils.java | 27 ----------------------
.../hudi/sink/bootstrap/BootstrapOperator.java | 4 +++-
.../java/org/apache/hudi/source/FileIndex.java | 2 +-
3 files changed, 4 insertions(+), 29 deletions(-)
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index a7a32ae527e..fcb83f9cffb 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -24,7 +24,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.inline.InLineFileSystem;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
@@ -246,32 +245,6 @@ public class FSUtils {
: fullPartitionPathStr.substring(partitionStartIndex +
basePath.getName().length() + 1);
}
- /**
- * Obtain all the partition paths, that are present in this table, denoted
by presence of
- * {@link HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE_PREFIX}.
- *
- * If the basePathStr is a subdirectory of .hoodie folder then we assume
that the partitions of an internal
- * table (a hoodie table within the .hoodie directory) are to be obtained.
- *
- * @param fs FileSystem instance
- * @param basePathStr base directory
- */
- public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs,
String basePathStr) throws IOException {
- // If the basePathStr is a folder within the .hoodie directory then we are
listing partitions within an
- // internal table.
- final boolean isMetadataTable =
HoodieTableMetadata.isMetadataTable(basePathStr);
- final Path basePath = new Path(basePathStr);
- final List<String> partitions = new ArrayList<>();
- processFiles(fs, basePathStr, (locatedFileStatus) -> {
- Path filePath = locatedFileStatus.getPath();
- if
(filePath.getName().startsWith(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE_PREFIX))
{
- partitions.add(getRelativePartitionPath(basePath,
filePath.getParent()));
- }
- return true;
- }, !isMetadataTable);
- return partitions;
- }
-
/**
* Recursively processes all files in the base-path. If excludeMetaFolder is
set, the meta-folder and all its subdirs
* are skipped
diff --git
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
index 00fb49ee0c9..7c9daf4075d 100644
---
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
+++
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
@@ -18,6 +18,7 @@
package org.apache.hudi.sink.bootstrap;
+import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieAvroRecord;
@@ -68,6 +69,7 @@ import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import static java.util.stream.Collectors.toList;
+import static org.apache.hudi.source.FileIndex.metadataConfig;
import static org.apache.hudi.util.StreamerUtil.isValidFile;
/**
@@ -140,7 +142,7 @@ public class BootstrapOperator<I, O extends HoodieRecord<?>>
String basePath = hoodieTable.getMetaClient().getBasePath();
int taskID = getRuntimeContext().getIndexOfThisSubtask();
LOG.info("Start loading records in table {} into the index state, taskId =
{}", basePath, taskID);
- for (String partitionPath :
FSUtils.getAllFoldersWithPartitionMetaFile(FSUtils.getFs(basePath, hadoopConf),
basePath)) {
+ for (String partitionPath : FSUtils.getAllPartitionPaths(new
HoodieFlinkEngineContext(hadoopConf), metadataConfig(conf), basePath)) {
if (pattern.matcher(partitionPath).matches()) {
loadRecords(partitionPath);
}
diff --git
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
index 87f27d2247a..2ddf10ef171 100644
---
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
+++
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
@@ -285,7 +285,7 @@ public class FileIndex {
return this.partitionPaths;
}
- private static HoodieMetadataConfig
metadataConfig(org.apache.flink.configuration.Configuration conf) {
+ public static HoodieMetadataConfig
metadataConfig(org.apache.flink.configuration.Configuration conf) {
Properties properties = new Properties();
// set up metadata.enabled=true in table DDL to enable metadata listing