This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new fb2361f78dc [HUDI-7881] Compare bigquery table base path via 
source-uris if hive partitioning options are not available (#11679)
fb2361f78dc is described below

commit fb2361f78dc89a89b2922e7d36fdce19a2c140ad
Author: Praveen Gajulapalli <[email protected]>
AuthorDate: Thu Aug 8 11:26:01 2024 +0530

    [HUDI-7881] Compare bigquery table base path via source-uris if hive 
partitioning options are not available (#11679)
    
    Co-authored-by: Y Ethan Guo <[email protected]>
---
 .../gcp/bigquery/HoodieBigQuerySyncClient.java     | 33 ++++++++++++++++------
 .../gcp/bigquery/TestHoodieBigQuerySyncClient.java |  5 ++++
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git 
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
 
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
index 5143e0af28b..f46c63f1e2b 100644
--- 
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
+++ 
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
@@ -19,6 +19,7 @@
 
 package org.apache.hudi.gcp.bigquery;
 
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.VisibleForTesting;
 import org.apache.hudi.sync.common.HoodieSyncClient;
@@ -297,6 +298,7 @@ public class HoodieBigQuerySyncClient extends 
HoodieSyncClient {
 
   /**
    * Checks for the existence of a table that uses the manifest file approach 
and matches other requirements.
+   *
    * @param tableName name of the table
    * @return Returns true if the table does not exist or if the table does 
exist but does not use the manifest file or table base path is outdated. False 
otherwise.
    */
@@ -310,15 +312,8 @@ public class HoodieBigQuerySyncClient extends 
HoodieSyncClient {
     boolean manifestDoesNotExist =
         externalTableDefinition.getSourceUris() == null
             || externalTableDefinition.getSourceUris().stream().noneMatch(uri 
-> uri.contains(ManifestFileWriter.ABSOLUTE_PATH_MANIFEST_FOLDER_NAME));
-    String basePathInTableDefinition = 
externalTableDefinition.getHivePartitioningOptions() == null ? "" :
-        
externalTableDefinition.getHivePartitioningOptions().getSourceUriPrefix();
-    // remove trailing slash
-    basePathInTableDefinition = 
StringUtils.stripEnd(basePathInTableDefinition, "/");
-    String basePath = getBasePath();
-    basePath = StringUtils.stripEnd(basePath, "/");
-    if (!basePathInTableDefinition.equals(basePath)) {
+    if (isBasePathUpdated(externalTableDefinition)) {
       // if table base path is outdated, we need to replace the table.
-      LOG.warn("Base path in table definition: {}, new base path: {}", 
basePathInTableDefinition, basePath);
       return true;
     }
     if 
(!StringUtils.isNullOrEmpty(config.getString(BIGQUERY_SYNC_BIG_LAKE_CONNECTION_ID)))
 {
@@ -328,6 +323,28 @@ public class HoodieBigQuerySyncClient extends 
HoodieSyncClient {
     return manifestDoesNotExist;
   }
 
+  private boolean isBasePathUpdated(ExternalTableDefinition 
externalTableDefinition) {
+    String basePath = StringUtils.stripEnd(getBasePath(), "/");
+    if (externalTableDefinition.getHivePartitioningOptions() == null) {
+      List<String> sourceUris = 
Option.ofNullable(externalTableDefinition.getSourceUris()).orElse(Collections.emptyList());
+      // compare source uris with trailing slash to make sure the unwanted 
prefix matches are avoided
+      String basePathWithTrailingSlash = String.format("%s/", basePath);
+      boolean isTableBasePathUpdated = sourceUris.stream()
+          .noneMatch(sourceUri -> 
sourceUri.startsWith(basePathWithTrailingSlash));
+      if (isTableBasePathUpdated) {
+        LOG.warn("Base path in table source uris: {}, new base path: {}", 
sourceUris, basePathWithTrailingSlash);
+      }
+      return isTableBasePathUpdated;
+    }
+    String basePathInTableDefinition = 
externalTableDefinition.getHivePartitioningOptions().getSourceUriPrefix();
+    basePathInTableDefinition = 
StringUtils.stripEnd(basePathInTableDefinition, "/");
+    boolean isTableBasePathUpdated = 
!basePathInTableDefinition.equals(basePath);
+    if (isTableBasePathUpdated) {
+      LOG.warn("Base path in table definition: {}, new base path: {}", 
basePathInTableDefinition, basePath);
+    }
+    return isTableBasePathUpdated;
+  }
+
   @Override
   public void close() {
     bigquery = null;
diff --git 
a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
 
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
index 6394f36225e..e004edf9f1f 100644
--- 
a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
+++ 
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
@@ -186,6 +186,11 @@ public class TestHoodieBigQuerySyncClient {
     
when(externalTableDefinition.getSourceUris()).thenReturn(Collections.emptyList());
     assertTrue(client.tableNotExistsOrDoesNotMatchSpecification(TEST_TABLE));
 
+    // manifest exists but base path is outdated
+    
when(externalTableDefinition.getSourceUris()).thenReturn(Collections.singletonList(
+        basePath + "/.hoodie/" + 
ManifestFileWriter.ABSOLUTE_PATH_MANIFEST_FOLDER_NAME));
+    assertFalse(client.tableNotExistsOrDoesNotMatchSpecification(TEST_TABLE));
+
     // manifest exists but base path is outdated
     
when(externalTableDefinition.getSourceUris()).thenReturn(Collections.singletonList(ManifestFileWriter.ABSOLUTE_PATH_MANIFEST_FOLDER_NAME));
     when(externalTableDefinition.getHivePartitioningOptions()).thenReturn(

Reply via email to