This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch branch-0.x in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 3ef0b7a5d955af8f322e0cb0bdfde21559a96ef4 Author: Y Ethan Guo <[email protected]> AuthorDate: Fri Apr 26 09:33:01 2024 -0700 [HUDI-7676] Fix serialization in Spark DAG in HoodieBackedTableMetadataWriter (#11103) --- .../org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index 8970640c6ee..5da20c9f5d6 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -68,6 +68,7 @@ import org.apache.hudi.exception.HoodieIndexException; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.exception.TableNotFoundException; import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.HoodieStorageUtils; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.storage.StoragePathInfo; import org.apache.hudi.table.BulkInsertPartitioner; @@ -592,6 +593,7 @@ public abstract class HoodieBackedTableMetadataWriter<I> implements HoodieTableM List<DirectoryInfo> partitionsToBootstrap = new LinkedList<>(); final int fileListingParallelism = metadataWriteConfig.getFileListingParallelism(); + SerializableConfiguration conf = new SerializableConfiguration(dataMetaClient.getHadoopConf()); final String dirFilterRegex = dataWriteConfig.getMetadataConfig().getDirectoryFilterRegex(); final String datasetBasePath = dataMetaClient.getBasePathV2().toString(); StoragePath storageBasePath = new StoragePath(datasetBasePath); @@ -602,8 +604,9 @@ public abstract class HoodieBackedTableMetadataWriter<I> implements HoodieTableM // List all directories in parallel engineContext.setJobStatus(this.getClass().getSimpleName(), "Listing " + numDirsToList + " partitions from filesystem"); List<DirectoryInfo> processedDirectories = engineContext.map(pathsToList.subList(0, numDirsToList), path -> { + HoodieStorage storage = HoodieStorageUtils.getStorage(path, conf.get()); String relativeDirPath = FSUtils.getRelativePartitionPath(storageBasePath, path); - return new DirectoryInfo(relativeDirPath, metadataMetaClient.getStorage().listDirectEntries(path), initializationTime); + return new DirectoryInfo(relativeDirPath, storage.listDirectEntries(path), initializationTime); }, numDirsToList); pathsToList = new LinkedList<>(pathsToList.subList(numDirsToList, pathsToList.size()));
