This is an automated email from the ASF dual-hosted git repository. xushiyan pushed a commit to branch revert-6072-HUDI-4324-remove-use-jdbc in repository https://gitbox.apache.org/repos/asf/hudi.git
commit a64557126cfb05c57b34142e449569f25959a12d Author: Shiyan Xu <[email protected]> AuthorDate: Wed Jul 20 23:00:12 2022 -0700 Revert "[HUDI-4324] Remove use_jdbc config from hudi sync (#6072)" This reverts commit 046044c83d6382d455cfb0ff5c1130ddf926fcf3. --- conf/hudi-defaults.conf.template | 2 +- docker/demo/config/hoodie-incr.properties | 3 +- docker/demo/sparksql-incremental.commands | 2 -- .../util/TestDFSPropertiesConfiguration.java | 2 +- .../resources/external-config/hudi-defaults.conf | 2 +- .../apache/hudi/configuration/FlinkOptions.java | 6 ++++ .../apache/hudi/sink/utils/HiveSyncContext.java | 2 ++ .../apache/hudi/streamer/FlinkStreamerConfig.java | 4 +++ hudi-kafka-connect/demo/config-sink-hive.json | 1 + .../hudi/connect/writers/KafkaConnectConfigs.java | 1 + .../scala/org/apache/hudi/DataSourceOptions.scala | 10 +++++++ .../scala/org/apache/hudi/HoodieWriterUtils.scala | 1 + .../java/org/apache/hudi/hive/HiveSyncConfig.java | 5 ++++ .../org/apache/hudi/hive/HiveSyncConfigHolder.java | 11 +++++++- .../org/apache/hudi/hive/HoodieHiveSyncClient.java | 32 +++++++++++++--------- .../apache/hudi/hive/ddl/HiveQueryDDLExecutor.java | 2 +- .../org/apache/hudi/hive/ddl/JDBCExecutor.java | 2 +- .../replication/HiveSyncGlobalCommitParams.java | 2 -- .../org/apache/hudi/hive/TestHiveSyncTool.java | 2 +- .../hudi/utilities/HoodieDropPartitionsTool.java | 9 ++++-- 20 files changed, 72 insertions(+), 29 deletions(-) diff --git a/conf/hudi-defaults.conf.template b/conf/hudi-defaults.conf.template index fbcedb3f18..175dbaf23d 100644 --- a/conf/hudi-defaults.conf.template +++ b/conf/hudi-defaults.conf.template @@ -20,7 +20,7 @@ # Example: # hoodie.datasource.hive_sync.jdbcurl jdbc:hive2://localhost:10000 -# hoodie.datasource.hive_sync.mode jdbc +# hoodie.datasource.hive_sync.use_jdbc true # hoodie.datasource.hive_sync.support_timestamp false # hoodie.index.type BLOOM # hoodie.metadata.enable false diff --git a/docker/demo/config/hoodie-incr.properties b/docker/demo/config/hoodie-incr.properties index 47bfc95b53..80f474b1e7 100644 --- a/docker/demo/config/hoodie-incr.properties +++ b/docker/demo/config/hoodie-incr.properties @@ -28,6 +28,5 @@ hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true # hive sync hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2 -hoodie.datasource.hive_sync.mode=jdbc hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 -hoodie.datasource.hive_sync.partition_fields=partition +hoodie.datasource.hive_sync.partition_fields=partition \ No newline at end of file diff --git a/docker/demo/sparksql-incremental.commands b/docker/demo/sparksql-incremental.commands index 3d7da63703..9ec586e49d 100644 --- a/docker/demo/sparksql-incremental.commands +++ b/docker/demo/sparksql-incremental.commands @@ -47,7 +47,6 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor"). option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor"). option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default"). - option(HiveSyncConfigHolder.HIVE_SYNC_MODE.key(), "jdbc"). option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000"). option(HiveSyncConfigHolder.HIVE_USER.key(), "hive"). option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive"). @@ -80,7 +79,6 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor_bs"). option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor_bs"). option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default"). - option(HiveSyncConfigHolder.HIVE_SYNC_MODE.key(), "jdbc"). option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000"). option(HiveSyncConfigHolder.HIVE_USER.key(), "hive"). option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive"). diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java index a122f414f9..465739340d 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java @@ -185,7 +185,7 @@ public class TestDFSPropertiesConfiguration { DFSPropertiesConfiguration.refreshGlobalProps(); assertEquals(5, DFSPropertiesConfiguration.getGlobalProps().size()); assertEquals("jdbc:hive2://localhost:10000", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.jdbcurl")); - assertEquals("jdbc", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.mode")); + assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.use_jdbc")); assertEquals("false", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.support_timestamp")); assertEquals("BLOOM", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.index.type")); assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.metadata.enable")); diff --git a/hudi-common/src/test/resources/external-config/hudi-defaults.conf b/hudi-common/src/test/resources/external-config/hudi-defaults.conf index 2e4c3a5d75..1133adb4d7 100644 --- a/hudi-common/src/test/resources/external-config/hudi-defaults.conf +++ b/hudi-common/src/test/resources/external-config/hudi-defaults.conf @@ -20,7 +20,7 @@ # Example: hoodie.datasource.hive_sync.jdbcurl jdbc:hive2://localhost:10000 -hoodie.datasource.hive_sync.mode jdbc +hoodie.datasource.hive_sync.use_jdbc true hoodie.datasource.hive_sync.support_timestamp false hoodie.index.type BLOOM hoodie.metadata.enable true diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java index 7425540de9..71b7976e14 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java @@ -755,6 +755,12 @@ public class FlinkOptions extends HoodieConfig { .defaultValue(false) .withDescription("Assume partitioning is yyyy/mm/dd, default false"); + public static final ConfigOption<Boolean> HIVE_SYNC_USE_JDBC = ConfigOptions + .key("hive_sync.use_jdbc") + .booleanType() + .defaultValue(true) + .withDescription("Use JDBC when hive synchronization is enabled, default true"); + public static final ConfigOption<Boolean> HIVE_SYNC_AUTO_CREATE_DB = ConfigOptions .key("hive_sync.auto_create_db") .booleanType() diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java index cceab5a615..e34adac580 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java @@ -43,6 +43,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_PROPERTIES; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT; import static org.apache.hudi.hive.HiveSyncConfigHolder.METASTORE_URIS; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION; @@ -104,6 +105,7 @@ public class HiveSyncContext { props.setPropertyIfNonNull(HIVE_TABLE_SERDE_PROPERTIES.key(), conf.getString(FlinkOptions.HIVE_SYNC_TABLE_SERDE_PROPERTIES)); props.setPropertyIfNonNull(META_SYNC_PARTITION_FIELDS.key(), String.join(",", FilePathUtils.extractHivePartitionFields(conf))); props.setPropertyIfNonNull(META_SYNC_PARTITION_EXTRACTOR_CLASS.key(), conf.getString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME)); + props.setPropertyIfNonNull(HIVE_USE_JDBC.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC))); props.setPropertyIfNonNull(META_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), String.valueOf(conf.getBoolean(FlinkOptions.METADATA_ENABLED))); props.setPropertyIfNonNull(HIVE_IGNORE_EXCEPTIONS.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_IGNORE_EXCEPTIONS))); props.setPropertyIfNonNull(HIVE_SUPPORT_TIMESTAMP_TYPE.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_SUPPORT_TIMESTAMP))); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java index 1083754ca2..e9574dd52b 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java @@ -320,6 +320,9 @@ public class FlinkStreamerConfig extends Configuration { @Parameter(names = {"--hive-sync-assume-date-partitioning"}, description = "Assume partitioning is yyyy/mm/dd, default false") public Boolean hiveSyncAssumeDatePartition = false; + @Parameter(names = {"--hive-sync-use-jdbc"}, description = "Use JDBC when hive synchronization is enabled, default true") + public Boolean hiveSyncUseJdbc = true; + @Parameter(names = {"--hive-sync-auto-create-db"}, description = "Auto create hive database if it does not exists, default true") public Boolean hiveSyncAutoCreateDb = true; @@ -416,6 +419,7 @@ public class FlinkStreamerConfig extends Configuration { conf.setString(FlinkOptions.HIVE_SYNC_PARTITION_FIELDS, config.hiveSyncPartitionFields); conf.setString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME, config.hiveSyncPartitionExtractorClass); conf.setBoolean(FlinkOptions.HIVE_SYNC_ASSUME_DATE_PARTITION, config.hiveSyncAssumeDatePartition); + conf.setBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC, config.hiveSyncUseJdbc); conf.setBoolean(FlinkOptions.HIVE_SYNC_AUTO_CREATE_DB, config.hiveSyncAutoCreateDb); conf.setBoolean(FlinkOptions.HIVE_SYNC_IGNORE_EXCEPTIONS, config.hiveSyncIgnoreExceptions); conf.setBoolean(FlinkOptions.HIVE_SYNC_SKIP_RO_SUFFIX, config.hiveSyncSkipRoSuffix); diff --git a/hudi-kafka-connect/demo/config-sink-hive.json b/hudi-kafka-connect/demo/config-sink-hive.json index 7c49784cbf..214fd18919 100644 --- a/hudi-kafka-connect/demo/config-sink-hive.json +++ b/hudi-kafka-connect/demo/config-sink-hive.json @@ -21,6 +21,7 @@ "hoodie.datasource.hive_sync.table": "huditesttopic", "hoodie.datasource.hive_sync.partition_fields": "date", "hoodie.datasource.hive_sync.partition_extractor_class": "org.apache.hudi.hive.MultiPartKeysValueExtractor", + "hoodie.datasource.hive_sync.use_jdbc": "false", "hoodie.datasource.hive_sync.mode": "hms", "dfs.client.use.datanode.hostname": "true", "hive.metastore.uris": "thrift://hivemetastore:9083", diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java index 3b51fddfa8..e4543c692d 100644 --- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java +++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java @@ -172,6 +172,7 @@ public class KafkaConnectConfigs extends HoodieConfig { public static final String HIVE_URL = "hoodie.datasource.hive_sync.jdbcurl"; public static final String HIVE_PARTITION_FIELDS = "hoodie.datasource.hive_sync.partition_fields"; public static final String HIVE_PARTITION_EXTRACTOR_CLASS = "hoodie.datasource.hive_sync.partition_extractor_class"; + public static final String HIVE_USE_JDBC = "hoodie.datasource.hive_sync.use_jdbc"; public static final String HIVE_SYNC_MODE = "hoodie.datasource.hive_sync.mode"; public static final String HIVE_AUTO_CREATE_DATABASE = "hoodie.datasource.hive_sync.auto_create_database"; public static final String HIVE_IGNORE_EXCEPTIONS = "hoodie.datasource.hive_sync.ignore_exceptions"; diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index feeb572126..6adc66265f 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -437,6 +437,10 @@ object DataSourceWriteOptions { val HIVE_ASSUME_DATE_PARTITION: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION @Deprecated val HIVE_USE_PRE_APACHE_INPUT_FORMAT: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT + + /** @deprecated Use {@link HIVE_SYNC_MODE} instead of this config from 0.9.0 */ + @Deprecated + val HIVE_USE_JDBC: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_USE_JDBC @Deprecated val HIVE_AUTO_CREATE_DATABASE: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE @Deprecated @@ -496,6 +500,9 @@ object DataSourceWriteOptions { /** @deprecated Use {@link HIVE_USE_PRE_APACHE_INPUT_FORMAT} and its methods instead */ @Deprecated val HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY = HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT.key() + /** @deprecated Use {@link HIVE_USE_JDBC} and its methods instead */ + @Deprecated + val HIVE_USE_JDBC_OPT_KEY = HiveSyncConfigHolder.HIVE_USE_JDBC.key() /** @deprecated Use {@link HIVE_AUTO_CREATE_DATABASE} and its methods instead */ @Deprecated val HIVE_AUTO_CREATE_DATABASE_OPT_KEY = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE.key() @@ -686,6 +693,9 @@ object DataSourceWriteOptions { val DEFAULT_HIVE_ASSUME_DATE_PARTITION_OPT_VAL = HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION.defaultValue() @Deprecated val DEFAULT_USE_PRE_APACHE_INPUT_FORMAT_OPT_VAL = "false" + /** @deprecated Use {@link HIVE_USE_JDBC} and its methods instead */ + @Deprecated + val DEFAULT_HIVE_USE_JDBC_OPT_VAL = HiveSyncConfigHolder.HIVE_USE_JDBC.defaultValue() /** @deprecated Use {@link HIVE_AUTO_CREATE_DATABASE} and its methods instead */ @Deprecated val DEFAULT_HIVE_AUTO_CREATE_DATABASE_OPT_KEY = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE.defaultValue() diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala index f6473c2b89..f9d8a60004 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala @@ -78,6 +78,7 @@ object HoodieWriterUtils { hoodieConfig.setDefaultValue(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS) hoodieConfig.setDefaultValue(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS) hoodieConfig.setDefaultValue(HIVE_STYLE_PARTITIONING) + hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_USE_JDBC) hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE) hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_SYNC_AS_DATA_SOURCE_TABLE) hoodieConfig.setDefaultValue(ASYNC_COMPACT_ENABLE) diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index cdb192f9fe..6f2cc50a0a 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -45,6 +45,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_PROPERTIES; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT; import static org.apache.hudi.hive.HiveSyncConfigHolder.METASTORE_URIS; @@ -94,6 +95,9 @@ public class HiveSyncConfig extends HoodieSyncConfig { + "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to " + "org.apache.hudi input format.") public Boolean usePreApacheInputFormat; + @Deprecated + @Parameter(names = {"--use-jdbc"}, description = "Hive jdbc connect url") + public Boolean useJdbc; @Parameter(names = {"--metastore-uris"}, description = "Hive metastore uris") public String metastoreUris; @Parameter(names = {"--sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms,glue,jdbc and hiveql") @@ -138,6 +142,7 @@ public class HiveSyncConfig extends HoodieSyncConfig { props.setPropertyIfNonNull(HIVE_PASS.key(), hivePass); props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl); props.setPropertyIfNonNull(HIVE_USE_PRE_APACHE_INPUT_FORMAT.key(), usePreApacheInputFormat); + props.setPropertyIfNonNull(HIVE_USE_JDBC.key(), useJdbc); props.setPropertyIfNonNull(HIVE_SYNC_MODE.key(), syncMode); props.setPropertyIfNonNull(METASTORE_URIS.key(), metastoreUris); props.setPropertyIfNonNull(HIVE_AUTO_CREATE_DATABASE.key(), autoCreateDatabase); diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java index 8c14cdfe29..3877782c92 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java @@ -52,6 +52,15 @@ public class HiveSyncConfigHolder { .withDocumentation("Flag to choose InputFormat under com.uber.hoodie package instead of org.apache.hudi package. " + "Use this when you are in the process of migrating from " + "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to org.apache.hudi input format"); + /** + * @deprecated Use {@link #HIVE_SYNC_MODE} instead of this config from 0.9.0 + */ + @Deprecated + public static final ConfigProperty<String> HIVE_USE_JDBC = ConfigProperty + .key("hoodie.datasource.hive_sync.use_jdbc") + .defaultValue("true") + .deprecatedAfter("0.9.0") + .withDocumentation("Use JDBC when hive synchronization is enabled"); public static final ConfigProperty<String> METASTORE_URIS = ConfigProperty .key("hoodie.datasource.hive_sync.metastore.uris") .defaultValue("thrift://localhost:9083") @@ -100,7 +109,7 @@ public class HiveSyncConfigHolder { .withDocumentation("The number of partitions one batch when synchronous partitions to hive."); public static final ConfigProperty<String> HIVE_SYNC_MODE = ConfigProperty .key("hoodie.datasource.hive_sync.mode") - .defaultValue("jdbc") + .noDefaultValue() .withDocumentation("Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql."); public static final ConfigProperty<Boolean> HIVE_SYNC_BUCKET_SYNC = ConfigProperty .key("hoodie.datasource.hive_sync.bucket_sync") diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java index 26ba4ae8e1..d5a85adcba 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java @@ -21,6 +21,7 @@ package org.apache.hudi.hive; import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.hive.ddl.DDLExecutor; import org.apache.hudi.hive.ddl.HMSDDLExecutor; @@ -48,6 +49,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; import static org.apache.hudi.sync.common.util.TableUtils.tableId; @@ -70,19 +72,23 @@ public class HoodieHiveSyncClient extends HoodieSyncClient { // Support JDBC, HiveQL and metastore based implementations for backwards compatibility. Future users should // disable jdbc and depend on metastore client for all hive registrations try { - HiveSyncMode syncMode = HiveSyncMode.of(config.getStringOrDefault(HIVE_SYNC_MODE)); - switch (syncMode) { - case HMS: - ddlExecutor = new HMSDDLExecutor(config); - break; - case HIVEQL: - ddlExecutor = new HiveQueryDDLExecutor(config); - break; - case JDBC: - ddlExecutor = new JDBCExecutor(config); - break; - default: - throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE)); + if (!StringUtils.isNullOrEmpty(config.getString(HIVE_SYNC_MODE))) { + HiveSyncMode syncMode = HiveSyncMode.of(config.getString(HIVE_SYNC_MODE)); + switch (syncMode) { + case HMS: + ddlExecutor = new HMSDDLExecutor(config); + break; + case HIVEQL: + ddlExecutor = new HiveQueryDDLExecutor(config); + break; + case JDBC: + ddlExecutor = new JDBCExecutor(config); + break; + default: + throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE)); + } + } else { + ddlExecutor = config.getBoolean(HIVE_USE_JDBC) ? new JDBCExecutor(config) : new HiveQueryDDLExecutor(config); } this.client = Hive.get(config.getHiveConf()).getMSC(); } catch (Exception e) { diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java index 619a417c33..90efd2701c 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java @@ -47,7 +47,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.sync.common.util.TableUtils.tableId; /** - * This class offers DDL executor backed by the HiveQL Driver. + * This class offers DDL executor backed by the hive.ql Driver This class preserves the old useJDBC = false way of doing things. */ public class HiveQueryDDLExecutor extends QueryBasedDDLExecutor { diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java index 1fc8f082d8..2673e46a9f 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java @@ -43,7 +43,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER; import static org.apache.hudi.hive.util.HiveSchemaUtil.HIVE_ESCAPE_CHARACTER; /** - * This class offers DDL executor backed by the jdbc. + * This class offers DDL executor backed by the jdbc This class preserves the old useJDBC = true way of doing things. */ public class JDBCExecutor extends QueryBasedDDLExecutor { diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java index b8b2de73e7..58188f578e 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java @@ -32,7 +32,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.Properties; -import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH; @@ -93,7 +92,6 @@ public class HiveSyncGlobalCommitParams { String jdbcUrl = forRemote ? loadedProps.getProperty(REMOTE_HIVE_SERVER_JDBC_URLS) : loadedProps.getProperty(LOCAL_HIVE_SERVER_JDBC_URLS, loadedProps.getProperty(HIVE_URL.key())); props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl); - props.setProperty(HIVE_SYNC_MODE.key(), "jdbc"); LOG.info("building hivesync config forRemote: " + forRemote + " " + jdbcUrl + " " + basePath); return props; diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java index 7abeed480c..072feeb663 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java @@ -120,7 +120,7 @@ public class TestHiveSyncTool { return opts; } - // (useSchemaFromCommitMetadata, syncAsDataSource, syncMode) + // (useJdbc, useSchemaFromCommitMetadata, syncAsDataSource) private static Iterable<Object[]> syncDataSourceTableParams() { List<Object[]> opts = new ArrayList<>(); for (Object mode : SYNC_MODES) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java index fc16bcaa1b..95e84e413c 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java @@ -174,6 +174,8 @@ public class HoodieDropPartitionsTool implements Serializable { public String hiveURL = "jdbc:hive2://localhost:10000"; @Parameter(names = {"--hive-partition-field"}, description = "Comma separated list of field in the hive table to use for determining hive partition columns.", required = false) public String hivePartitionsField = ""; + @Parameter(names = {"--hive-sync-use-jdbc"}, description = "Use JDBC when hive synchronization.", required = false) + public boolean hiveUseJdbc = true; @Parameter(names = {"--hive-metastore-uris"}, description = "hive meta store uris to use.", required = false) public String hiveHMSUris = null; @Parameter(names = {"--hive-sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql.", required = false) @@ -213,7 +215,7 @@ public class HoodieDropPartitionsTool implements Serializable { + " --hive-pass-word " + "Masked" + ", \n" + " --hive-jdbc-url " + hiveURL + ", \n" + " --hive-partition-field " + hivePartitionsField + ", \n" - + " --hive-sync-mode " + hiveSyncMode + ", \n" + + " --hive-sync-use-jdbc " + hiveUseJdbc + ", \n" + " --hive-metastore-uris " + hiveHMSUris + ", \n" + " --hive-sync-ignore-exception " + hiveSyncIgnoreException + ", \n" + " --hive-partition-value-extractor-class " + partitionValueExtractorClass + ", \n" @@ -245,7 +247,7 @@ public class HoodieDropPartitionsTool implements Serializable { && Objects.equals(hivePassWord, config.hivePassWord) && Objects.equals(hiveURL, config.hiveURL) && Objects.equals(hivePartitionsField, config.hivePartitionsField) - && Objects.equals(hiveSyncMode, config.hiveSyncMode) + && Objects.equals(hiveUseJdbc, config.hiveUseJdbc) && Objects.equals(hiveHMSUris, config.hiveHMSUris) && Objects.equals(partitionValueExtractorClass, config.partitionValueExtractorClass) && Objects.equals(sparkMaster, config.sparkMaster) @@ -259,7 +261,7 @@ public class HoodieDropPartitionsTool implements Serializable { public int hashCode() { return Objects.hash(basePath, runningMode, tableName, partitions, instantTime, syncToHive, hiveDataBase, hiveTableName, hiveUserName, hivePassWord, hiveURL, - hivePartitionsField, hiveSyncMode, hiveHMSUris, partitionValueExtractorClass, + hivePartitionsField, hiveUseJdbc, hiveHMSUris, partitionValueExtractorClass, sparkMaster, sparkMemory, propsFilePath, configs, hiveSyncIgnoreException, help); } } @@ -348,6 +350,7 @@ public class HoodieDropPartitionsTool implements Serializable { props.put(DataSourceWriteOptions.HIVE_PASS().key(), cfg.hivePassWord); props.put(DataSourceWriteOptions.HIVE_URL().key(), cfg.hiveURL); props.put(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), cfg.hivePartitionsField); + props.put(DataSourceWriteOptions.HIVE_USE_JDBC().key(), cfg.hiveUseJdbc); props.put(DataSourceWriteOptions.HIVE_SYNC_MODE().key(), cfg.hiveSyncMode); props.put(DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS().key(), cfg.hiveSyncIgnoreException); props.put(DataSourceWriteOptions.HIVE_PASS().key(), cfg.hivePassWord);
