This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new fb2361f78dc [HUDI-7881] Compare bigquery table base path via
source-uris if hive partitioning options are not available (#11679)
fb2361f78dc is described below
commit fb2361f78dc89a89b2922e7d36fdce19a2c140ad
Author: Praveen Gajulapalli <[email protected]>
AuthorDate: Thu Aug 8 11:26:01 2024 +0530
[HUDI-7881] Compare bigquery table base path via source-uris if hive
partitioning options are not available (#11679)
Co-authored-by: Y Ethan Guo <[email protected]>
---
.../gcp/bigquery/HoodieBigQuerySyncClient.java | 33 ++++++++++++++++------
.../gcp/bigquery/TestHoodieBigQuerySyncClient.java | 5 ++++
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
index 5143e0af28b..f46c63f1e2b 100644
---
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
+++
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java
@@ -19,6 +19,7 @@
package org.apache.hudi.gcp.bigquery;
+import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.VisibleForTesting;
import org.apache.hudi.sync.common.HoodieSyncClient;
@@ -297,6 +298,7 @@ public class HoodieBigQuerySyncClient extends
HoodieSyncClient {
/**
* Checks for the existence of a table that uses the manifest file approach
and matches other requirements.
+ *
* @param tableName name of the table
* @return Returns true if the table does not exist or if the table does
exist but does not use the manifest file or table base path is outdated. False
otherwise.
*/
@@ -310,15 +312,8 @@ public class HoodieBigQuerySyncClient extends
HoodieSyncClient {
boolean manifestDoesNotExist =
externalTableDefinition.getSourceUris() == null
|| externalTableDefinition.getSourceUris().stream().noneMatch(uri
-> uri.contains(ManifestFileWriter.ABSOLUTE_PATH_MANIFEST_FOLDER_NAME));
- String basePathInTableDefinition =
externalTableDefinition.getHivePartitioningOptions() == null ? "" :
-
externalTableDefinition.getHivePartitioningOptions().getSourceUriPrefix();
- // remove trailing slash
- basePathInTableDefinition =
StringUtils.stripEnd(basePathInTableDefinition, "/");
- String basePath = getBasePath();
- basePath = StringUtils.stripEnd(basePath, "/");
- if (!basePathInTableDefinition.equals(basePath)) {
+ if (isBasePathUpdated(externalTableDefinition)) {
// if table base path is outdated, we need to replace the table.
- LOG.warn("Base path in table definition: {}, new base path: {}",
basePathInTableDefinition, basePath);
return true;
}
if
(!StringUtils.isNullOrEmpty(config.getString(BIGQUERY_SYNC_BIG_LAKE_CONNECTION_ID)))
{
@@ -328,6 +323,28 @@ public class HoodieBigQuerySyncClient extends
HoodieSyncClient {
return manifestDoesNotExist;
}
+ private boolean isBasePathUpdated(ExternalTableDefinition
externalTableDefinition) {
+ String basePath = StringUtils.stripEnd(getBasePath(), "/");
+ if (externalTableDefinition.getHivePartitioningOptions() == null) {
+ List<String> sourceUris =
Option.ofNullable(externalTableDefinition.getSourceUris()).orElse(Collections.emptyList());
+ // compare source uris with trailing slash to make sure the unwanted
prefix matches are avoided
+ String basePathWithTrailingSlash = String.format("%s/", basePath);
+ boolean isTableBasePathUpdated = sourceUris.stream()
+ .noneMatch(sourceUri ->
sourceUri.startsWith(basePathWithTrailingSlash));
+ if (isTableBasePathUpdated) {
+ LOG.warn("Base path in table source uris: {}, new base path: {}",
sourceUris, basePathWithTrailingSlash);
+ }
+ return isTableBasePathUpdated;
+ }
+ String basePathInTableDefinition =
externalTableDefinition.getHivePartitioningOptions().getSourceUriPrefix();
+ basePathInTableDefinition =
StringUtils.stripEnd(basePathInTableDefinition, "/");
+ boolean isTableBasePathUpdated =
!basePathInTableDefinition.equals(basePath);
+ if (isTableBasePathUpdated) {
+ LOG.warn("Base path in table definition: {}, new base path: {}",
basePathInTableDefinition, basePath);
+ }
+ return isTableBasePathUpdated;
+ }
+
@Override
public void close() {
bigquery = null;
diff --git
a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
index 6394f36225e..e004edf9f1f 100644
---
a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
+++
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java
@@ -186,6 +186,11 @@ public class TestHoodieBigQuerySyncClient {
when(externalTableDefinition.getSourceUris()).thenReturn(Collections.emptyList());
assertTrue(client.tableNotExistsOrDoesNotMatchSpecification(TEST_TABLE));
+ // manifest exists but base path is outdated
+
when(externalTableDefinition.getSourceUris()).thenReturn(Collections.singletonList(
+ basePath + "/.hoodie/" +
ManifestFileWriter.ABSOLUTE_PATH_MANIFEST_FOLDER_NAME));
+ assertFalse(client.tableNotExistsOrDoesNotMatchSpecification(TEST_TABLE));
+
// manifest exists but base path is outdated
when(externalTableDefinition.getSourceUris()).thenReturn(Collections.singletonList(ManifestFileWriter.ABSOLUTE_PATH_MANIFEST_FOLDER_NAME));
when(externalTableDefinition.getHivePartitioningOptions()).thenReturn(