This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new abe26d4169c [HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816)
abe26d4169c is described below
commit abe26d4169c04da05b99941161621876e3569e96
Author: Shiyan Xu <[email protected]>
AuthorDate: Thu Feb 2 00:39:28 2023 -0600
[HUDI-5676] Fix BigQuerySyncTool standalone mode (#7816)
---
.../hudi/gcp/bigquery/BigQuerySyncConfig.java | 38 ++++--------
.../gcp/bigquery/TestBigQuerySyncToolArgs.java | 70 ++++++++++++++++++++++
packaging/hudi-gcp-bundle/pom.xml | 8 ++-
3 files changed, 90 insertions(+), 26 deletions(-)
diff --git
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
index b46cd9a9f81..52b3d3b74e5 100644
---
a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
+++
b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncConfig.java
@@ -20,14 +20,13 @@
package org.apache.hudi.gcp.bigquery;
import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.sync.common.HoodieSyncConfig;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParametersDelegate;
import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Properties;
/**
@@ -101,38 +100,27 @@ public class BigQuerySyncConfig extends HoodieSyncConfig
implements Serializable
public String datasetName;
@Parameter(names = {"--dataset-location"}, description = "Location of the
target dataset in BigQuery", required = true)
public String datasetLocation;
- @Parameter(names = {"--table-name"}, description = "Name of the target
table in BigQuery", required = true)
- public String tableName;
@Parameter(names = {"--source-uri"}, description = "Name of the source uri
gcs path of the table", required = true)
public String sourceUri;
@Parameter(names = {"--source-uri-prefix"}, description = "Name of the
source uri gcs path prefix of the table", required = true)
public String sourceUriPrefix;
- @Parameter(names = {"--base-path"}, description = "Base path of the hoodie
table to sync", required = true)
- public String basePath;
- @Parameter(names = {"--partitioned-by"}, description = "Comma-delimited
partition fields. Default to non-partitioned.")
- public List<String> partitionFields = new ArrayList<>();
- @Parameter(names = {"--use-file-listing-from-metadata"}, description =
"Fetch file listing from Hudi's metadata")
- public boolean useFileListingFromMetadata = false;
- @Parameter(names = {"--assume-date-partitioning"}, description = "Assume
standard yyyy/mm/dd partitioning, this"
- + " exists to support backward compatibility. If you use hoodie 0.3.x,
do not set this parameter")
- public boolean assumeDatePartitioning = false;
public boolean isHelp() {
return hoodieSyncConfigParams.isHelp();
}
- public Properties toProps() {
- final Properties props = hoodieSyncConfigParams.toProps();
- props.setProperty(BIGQUERY_SYNC_PROJECT_ID.key(), projectId);
- props.setProperty(BIGQUERY_SYNC_DATASET_NAME.key(), datasetName);
- props.setProperty(BIGQUERY_SYNC_DATASET_LOCATION.key(), datasetLocation);
- props.setProperty(BIGQUERY_SYNC_TABLE_NAME.key(), tableName);
- props.setProperty(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri);
- props.setProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(),
sourceUriPrefix);
- props.setProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key(), basePath);
- props.setProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key(), String.join(",",
partitionFields));
- props.setProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(),
String.valueOf(useFileListingFromMetadata));
- props.setProperty(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(),
String.valueOf(assumeDatePartitioning));
+ public TypedProperties toProps() {
+ final TypedProperties props = hoodieSyncConfigParams.toProps();
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_PROJECT_ID.key(), projectId);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_NAME.key(),
datasetName);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_DATASET_LOCATION.key(),
datasetLocation);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_TABLE_NAME.key(),
hoodieSyncConfigParams.tableName);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI.key(), sourceUri);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key(),
sourceUriPrefix);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_SYNC_BASE_PATH.key(),
hoodieSyncConfigParams.basePath);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_PARTITION_FIELDS.key(),
String.join(",", hoodieSyncConfigParams.partitionFields));
+
props.setPropertyIfNonNull(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key(),
hoodieSyncConfigParams.useFileListingFromMetadata);
+ props.setPropertyIfNonNull(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key(),
hoodieSyncConfigParams.assumeDatePartitioning);
return props;
}
}
diff --git
a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
new file mode 100644
index 00000000000..898358484d9
--- /dev/null
+++
b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestBigQuerySyncToolArgs.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.gcp.bigquery;
+
+import com.beust.jcommander.JCommander;
+import org.junit.jupiter.api.Test;
+
+import java.util.Properties;
+
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_LOCATION;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_DATASET_NAME;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PARTITION_FIELDS;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_PROJECT_ID;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SOURCE_URI_PREFIX;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_SYNC_BASE_PATH;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_TABLE_NAME;
+import static
org.apache.hudi.gcp.bigquery.BigQuerySyncConfig.BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+public class TestBigQuerySyncToolArgs {
+
+ @Test
+ public void testArgsParse() {
+ BigQuerySyncConfig.BigQuerySyncConfigParams params = new
BigQuerySyncConfig.BigQuerySyncConfigParams();
+ JCommander cmd = JCommander.newBuilder().addObject(params).build();
+ String[] args = {
+ "--project-id", "hudi-bq",
+ "--dataset-name", "foobar",
+ "--dataset-location", "us-west1",
+ "--table", "foobartable",
+ "--source-uri", "gs://foobartable/year=*",
+ "--source-uri-prefix", "gs://foobartable/",
+ "--base-path", "gs://foobartable",
+ "--partitioned-by", "year,month,day",
+ "--use-file-listing-from-metadata"
+ };
+ cmd.parse(args);
+
+ final Properties props = params.toProps();
+ assertEquals("hudi-bq", props.getProperty(BIGQUERY_SYNC_PROJECT_ID.key()));
+ assertEquals("foobar",
props.getProperty(BIGQUERY_SYNC_DATASET_NAME.key()));
+ assertEquals("us-west1",
props.getProperty(BIGQUERY_SYNC_DATASET_LOCATION.key()));
+ assertEquals("foobartable",
props.getProperty(BIGQUERY_SYNC_TABLE_NAME.key()));
+ assertEquals("gs://foobartable/year=*",
props.getProperty(BIGQUERY_SYNC_SOURCE_URI.key()));
+ assertEquals("gs://foobartable/",
props.getProperty(BIGQUERY_SYNC_SOURCE_URI_PREFIX.key()));
+ assertEquals("gs://foobartable",
props.getProperty(BIGQUERY_SYNC_SYNC_BASE_PATH.key()));
+ assertEquals("year,month,day",
props.getProperty(BIGQUERY_SYNC_PARTITION_FIELDS.key()));
+ assertEquals("true",
props.getProperty(BIGQUERY_SYNC_USE_FILE_LISTING_FROM_METADATA.key()));
+
assertFalse(props.containsKey(BIGQUERY_SYNC_ASSUME_DATE_PARTITIONING.key()));
+ }
+}
diff --git a/packaging/hudi-gcp-bundle/pom.xml
b/packaging/hudi-gcp-bundle/pom.xml
index 85d3093e89a..fe9b6b55527 100644
--- a/packaging/hudi-gcp-bundle/pom.xml
+++ b/packaging/hudi-gcp-bundle/pom.xml
@@ -95,9 +95,9 @@
<include>org.apache.hudi:hudi-common</include>
<include>org.apache.hudi:hudi-hadoop-mr</include>
<include>org.apache.hudi:hudi-sync-common</include>
+ <include>org.apache.hudi:hudi-hive-sync</include>
<include>org.apache.hudi:hudi-gcp</include>
<include>org.apache.parquet:parquet-avro</include>
-
<include>com.google.cloud:google-cloud-bigquery</include>
<include>com.beust:jcommander</include>
<include>commons-io:commons-io</include>
@@ -164,6 +164,12 @@
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-hive-sync</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-gcp</artifactId>