This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 372fcd8784f [HUDI-6254] Allow using absolute path in 
ManifestFileWriter (#8787)
372fcd8784f is described below

commit 372fcd8784f7af65adad27047735ea24f4d50128
Author: Jinpeng <[email protected]>
AuthorDate: Mon May 29 03:13:33 2023 -0700

    [HUDI-6254] Allow using absolute path in ManifestFileWriter (#8787)
    
    Co-authored-by: jp0317 <[email protected]>
---
 .../apache/hudi/gcp/bigquery/BigQuerySyncTool.java |  2 +-
 .../hudi/sync/common/util/ManifestFileWriter.java  |  8 +++----
 .../sync/common/util/TestManifestFileWriter.java   | 25 +++++++++++++++++++---
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git 
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java 
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
index dee7ab406bd..042fbee210b 100644
--- a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
+++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java
@@ -96,7 +96,7 @@ public class BigQuerySyncTool extends HoodieSyncTool {
         
.setUseFileListingFromMetadata(config.getBoolean(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA))
         
.setAssumeDatePartitioning(config.getBoolean(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING))
         .build();
-    manifestFileWriter.writeManifestFile();
+    manifestFileWriter.writeManifestFile(false);
 
     if (!bqSyncClient.tableExists(manifestTableName)) {
       bqSyncClient.createManifestTable(manifestTableName, 
manifestFileWriter.getManifestSourceUri());
diff --git 
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
 
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
index 481d1ca781a..5203654d035 100644
--- 
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
+++ 
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java
@@ -59,9 +59,9 @@ public class ManifestFileWriter {
   /**
    * Write all the latest base file names to the manifest file.
    */
-  public synchronized void writeManifestFile() {
+  public synchronized void writeManifestFile(boolean useAbsolutePath) {
     try {
-      List<String> baseFiles = 
fetchLatestBaseFilesForAllPartitions(metaClient, useFileListingFromMetadata, 
assumeDatePartitioning)
+      List<String> baseFiles = 
fetchLatestBaseFilesForAllPartitions(metaClient, useFileListingFromMetadata, 
assumeDatePartitioning, useAbsolutePath)
           .collect(Collectors.toList());
       if (baseFiles.isEmpty()) {
         LOG.warn("No base file to generate manifest file.");
@@ -83,7 +83,7 @@ public class ManifestFileWriter {
   }
 
   public static Stream<String> 
fetchLatestBaseFilesForAllPartitions(HoodieTableMetaClient metaClient,
-      boolean useFileListingFromMetadata, boolean assumeDatePartitioning) {
+      boolean useFileListingFromMetadata, boolean assumeDatePartitioning, 
boolean useAbsolutePath) {
     try {
       List<String> partitions = FSUtils.getAllPartitionPaths(new 
HoodieLocalEngineContext(metaClient.getHadoopConf()),
           metaClient.getBasePath(), useFileListingFromMetadata, 
assumeDatePartitioning);
@@ -94,7 +94,7 @@ public class ManifestFileWriter {
         HoodieMetadataFileSystemView fsView = new 
HoodieMetadataFileSystemView(engContext, metaClient,
             
metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(),
             
HoodieMetadataConfig.newBuilder().enable(useFileListingFromMetadata).withAssumeDatePartitioning(assumeDatePartitioning).build());
-        return fsView.getLatestBaseFiles(p).map(HoodieBaseFile::getFileName);
+        return fsView.getLatestBaseFiles(p).map(useAbsolutePath ? 
HoodieBaseFile::getPath : HoodieBaseFile::getFileName);
       });
     } catch (Exception e) {
       throw new HoodieException("Error in fetching latest base files.", e);
diff --git 
a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
 
b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
index 47b60f72325..c1dffc8e4de 100644
--- 
a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
+++ 
b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestManifestFileWriter.java
@@ -30,10 +30,13 @@ import org.junit.jupiter.api.Test;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.stream.IntStream;
+import java.util.List;
 
 import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS;
 import static 
org.apache.hudi.sync.common.util.ManifestFileWriter.fetchLatestBaseFilesForAllPartitions;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 
 public class TestManifestFileWriter extends HoodieCommonTestHarness {
 
@@ -47,7 +50,7 @@ public class TestManifestFileWriter extends 
HoodieCommonTestHarness {
     // Generate 10 files under each partition
     createTestDataForPartitionedTable(metaClient, 10);
     ManifestFileWriter manifestFileWriter = 
ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
-    assertEquals(30, fetchLatestBaseFilesForAllPartitions(metaClient, false, 
false).count());
+    assertEquals(30, fetchLatestBaseFilesForAllPartitions(metaClient, false, 
false, false).count());
   }
 
   @Test
@@ -55,10 +58,26 @@ public class TestManifestFileWriter extends 
HoodieCommonTestHarness {
     // Generate 10 files under each partition
     createTestDataForPartitionedTable(metaClient, 3);
     ManifestFileWriter manifestFileWriter = 
ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
-    manifestFileWriter.writeManifestFile();
+    manifestFileWriter.writeManifestFile(false);
     Path manifestFilePath = manifestFileWriter.getManifestFilePath();
     try (InputStream is = metaClient.getFs().open(manifestFilePath)) {
-      assertEquals(9, FileIOUtils.readAsUTFStringLines(is).size(), "there 
should be 9 base files in total; 3 per partition.");
+      List<String> expectedLines = FileIOUtils.readAsUTFStringLines(is);
+      assertEquals(9, expectedLines.size(), "there should be 9 base files in 
total; 3 per partition.");
+      expectedLines.forEach(line -> assertFalse(line.contains(basePath)));
+    }
+  }
+
+  @Test
+  public void testCreateManifestFileWithAbsolutePath() throws Exception {
+    // Generate 10 files under each partition
+    createTestDataForPartitionedTable(metaClient, 3);
+    ManifestFileWriter manifestFileWriter = 
ManifestFileWriter.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).build();
+    manifestFileWriter.writeManifestFile(true);
+    Path manifestFilePath = manifestFileWriter.getManifestFilePath();
+    try (InputStream is = metaClient.getFs().open(manifestFilePath)) {
+      List<String> expectedLines = FileIOUtils.readAsUTFStringLines(is);
+      assertEquals(9, expectedLines.size(), "there should be 9 base files in 
total; 3 per partition.");
+      expectedLines.forEach(line -> 
assertTrue(line.startsWith(metaClient.getFs().getScheme() + ":" + basePath)));
     }
   }
 

Reply via email to