This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 372fcd8784f [HUDI-6254] Allow using absolute path in
ManifestFileWriter (#8787)
372fcd8784f is described below
commit 372fcd8784f7af65adad27047735ea24f4d50128
Author: Jinpeng <[email protected]>
AuthorDate: Mon May 29 03:13:33 2023 -0700
[HUDI-6254] Allow using absolute path in ManifestFileWriter (#8787)
Co-authored-by: jp0317 <[email protected]>
---
.../apache/hudi/gcp/bigquery/BigQuerySyncTool.java | 2 +-
.../hudi/sync/common/util/ManifestFileWriter.java | 8 +++----
.../sync/common/util/TestManifestFileWriter.java | 25 +++++++++++++++++++---
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
index dee7ab406bd..042fbee210b 100644
--- a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
@@ -96,7 +96,7 @@ public class BigQuerySyncTool extends HoodieSyncTool {
.setUseFileListingFromMetadata(config.getBoolean(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA))
.setAssumeDatePartitioning(config.getBoolean(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING))
.build();
- manifestFileWriter.writeManifestFile();
+ manifestFileWriter.writeManifestFile(false);
if (!bqSyncClient.tableExists(manifestTableName)) {
bqSyncClient.createManifestTable(manifestTableName,
manifestFileWriter.getManifestSourceUri());
diff --git
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
index 481d1ca781a..5203654d035 100644
---
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
+++
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
@@ -59,9 +59,9 @@ public class ManifestFileWriter {
/**
* Write all the latest base file names to the manifest file.
*/
- public synchronized void writeManifestFile() {
+ public synchronized void writeManifestFile(boolean useAbsolutePath) {
try {
- List<String> baseFiles =
fetchLatestBaseFilesForAllPartitions(metaClient, useFileListingFromMetadata,
assumeDatePartitioning)
+ List<String> baseFiles =
fetchLatestBaseFilesForAllPartitions(metaClient, useFileListingFromMetadata,
assumeDatePartitioning, useAbsolutePath)
.collect(Collectors.toList());
if (baseFiles.isEmpty()) {
LOG.warn("No base file to generate manifest file.");
@@ -83,7 +83,7 @@ public class ManifestFileWriter {
}
public static Stream<String>
fetchLatestBaseFilesForAllPartitions(HoodieTableMetaClient metaClient,
- boolean useFileListingFromMetadata, boolean assumeDatePartitioning) {
+ boolean useFileListingFromMetadata, boolean assumeDatePartitioning,
boolean useAbsolutePath) {
try {
List<String> partitions = FSUtils.getAllPartitionPaths(new
HoodieLocalEngineContext(metaClient.getHadoopConf()),
metaClient.getBasePath(), useFileListingFromMetadata,
assumeDatePartitioning);
@@ -94,7 +94,7 @@ public class ManifestFileWriter {
HoodieMetadataFileSystemView fsView = new
HoodieMetadataFileSystemView(engContext, metaClient,
metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(),
HoodieMetadataConfig.newBuilder().enable(useFileListingFromMetadata).withAssumeDatePartitioning(assumeDatePartitioning).build());
- return fsView.getLatestBaseFiles(p).map(HoodieBaseFile::getFileName);
+ return fsView.getLatestBaseFiles(p).map(useAbsolutePath ?
HoodieBaseFile::getPath : HoodieBaseFile::getFileName);
});
} catch (Exception e) {
throw new HoodieException("Error in fetching latest base files.", e);
diff --git
a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
index 47b60f72325..c1dffc8e4de 100644
---
a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
+++
b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
@@ -30,10 +30,13 @@ import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.io.InputStream;
import java.util.stream.IntStream;
+import java.util.List;
import static
org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS;
import static
org.apache.hudi.sync.common.util.ManifestFileWriter.fetchLatestBaseFilesForAllPartitions;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
public class TestManifestFileWriter extends HoodieCommonTestHarness {
@@ -47,7 +50,7 @@ public class TestManifestFileWriter extends
HoodieCommonTestHarness {
// Generate 10 files under each partition
createTestDataForPartitionedTable(metaClient, 10);
ManifestFileWriter manifestFileWriter =
ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
- assertEquals(30, fetchLatestBaseFilesForAllPartitions(metaClient, false,
false).count());
+ assertEquals(30, fetchLatestBaseFilesForAllPartitions(metaClient, false,
false, false).count());
}
@Test
@@ -55,10 +58,26 @@ public class TestManifestFileWriter extends
HoodieCommonTestHarness {
// Generate 10 files under each partition
createTestDataForPartitionedTable(metaClient, 3);
ManifestFileWriter manifestFileWriter =
ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
- manifestFileWriter.writeManifestFile();
+ manifestFileWriter.writeManifestFile(false);
Path manifestFilePath = manifestFileWriter.getManifestFilePath();
try (InputStream is = metaClient.getFs().open(manifestFilePath)) {
- assertEquals(9, FileIOUtils.readAsUTFStringLines(is).size(), "there
should be 9 base files in total; 3 per partition.");
+ List<String> expectedLines = FileIOUtils.readAsUTFStringLines(is);
+ assertEquals(9, expectedLines.size(), "there should be 9 base files in
total; 3 per partition.");
+ expectedLines.forEach(line -> assertFalse(line.contains(basePath)));
+ }
+ }
+
+ @Test
+ public void testCreateManifestFileWithAbsolutePath() throws Exception {
+ // Generate 10 files under each partition
+ createTestDataForPartitionedTable(metaClient, 3);
+ ManifestFileWriter manifestFileWriter =
ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
+ manifestFileWriter.writeManifestFile(true);
+ Path manifestFilePath = manifestFileWriter.getManifestFilePath();
+ try (InputStream is = metaClient.getFs().open(manifestFilePath)) {
+ List<String> expectedLines = FileIOUtils.readAsUTFStringLines(is);
+ assertEquals(9, expectedLines.size(), "there should be 9 base files in
total; 3 per partition.");
+ expectedLines.forEach(line ->
assertTrue(line.startsWith(metaClient.getFs().getScheme() + ":" + basePath)));
}
}