This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-0.13.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit c6c38f9c115e25e4ef4578356f4501235ee4820c Author: Y Ethan Guo <[email protected]> AuthorDate: Sat Feb 4 17:36:53 2023 -0800 [HUDI-5703] Improve `ConfigProperty` APIs for docs generation (#7854) This commit makes a few improvements around the `ConfigProperty` for auto-generating configuration docs on our website. --- .../hudi/config/DynamoDbBasedLockConfig.java | 9 +++-- .../apache/hudi/config/HoodieHBaseIndexConfig.java | 1 + .../org/apache/hudi/config/HoodieIndexConfig.java | 7 ++-- .../org/apache/hudi/config/HoodieLockConfig.java | 8 ++-- .../config/HoodieWriteCommitCallbackConfig.java | 5 ++- .../hudi/common/config/ConfigClassProperty.java | 6 ++- .../apache/hudi/common/config/ConfigGroups.java | 41 ++++++++++++++++++- .../apache/hudi/common/config/ConfigProperty.java | 47 ++++++++++++++++------ .../apache/hudi/common/config/HoodieConfig.java | 17 ++++++-- .../hudi/common/table/HoodieTableConfig.java | 10 ----- .../apache/hudi/sync/common/HoodieSyncConfig.java | 5 ++- .../HoodieWriteCommitKafkaCallbackConfig.java | 1 + .../HoodieWriteCommitPulsarCallbackConfig.java | 1 + 13 files changed, 115 insertions(+), 43 deletions(-) diff --git a/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java b/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java index 1894b8641c1..89f2515c992 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java +++ b/hudi-aws/src/main/java/org/apache/hudi/config/DynamoDbBasedLockConfig.java @@ -34,10 +34,11 @@ import static org.apache.hudi.common.config.LockConfiguration.LOCK_PREFIX; * Hoodie Configs for Locks. */ @ConfigClassProperty(name = "DynamoDB based Locks Configurations", - groupName = ConfigGroups.Names.WRITE_CLIENT, - description = "Configs that control DynamoDB based locking mechanisms required for concurrency control " - + " between writers to a Hudi table. Concurrency between Hudi's own table services " - + " are auto managed internally.") + groupName = ConfigGroups.Names.WRITE_CLIENT, + subGroupName = ConfigGroups.SubGroupNames.LOCK, + description = "Configs that control DynamoDB based locking mechanisms required for concurrency control " + + " between writers to a Hudi table. Concurrency between Hudi's own table services " + + " are auto managed internally.") public class DynamoDbBasedLockConfig extends HoodieConfig { // configs for DynamoDb based locks diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java index d52c407028a..359b9ab6a52 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java @@ -31,6 +31,7 @@ import java.util.Properties; @ConfigClassProperty(name = "HBase Index Configs", groupName = ConfigGroups.Names.WRITE_CLIENT, + subGroupName = ConfigGroups.SubGroupNames.INDEX, description = "Configurations that control indexing behavior " + "(when HBase based indexing is enabled), which tags incoming " + "records as either inserts or updates to older records.") diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java index 87e9ae576db..207c30509d8 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java @@ -59,10 +59,11 @@ import static org.apache.hudi.index.HoodieIndex.IndexType.SIMPLE; * Indexing related config. */ @Immutable -@ConfigClassProperty(name = "Index Configs", +@ConfigClassProperty(name = "Common Index Configs", groupName = ConfigGroups.Names.WRITE_CLIENT, - description = "Configurations that control indexing behavior, " - + "which tags incoming records as either inserts or updates to older records.") + subGroupName = ConfigGroups.SubGroupNames.INDEX, + areCommonConfigs = true, + description = "") public class HoodieIndexConfig extends HoodieConfig { private static final Logger LOG = LogManager.getLogger(HoodieIndexConfig.class); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java index 3c932756685..62aa6694275 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java @@ -60,11 +60,11 @@ import static org.apache.hudi.common.config.LockConfiguration.ZK_SESSION_TIMEOUT /** * Hoodie Configs for Locks. */ -@ConfigClassProperty(name = "Locks Configurations", +@ConfigClassProperty(name = "Common Lock Configurations", groupName = ConfigGroups.Names.WRITE_CLIENT, - description = "Configs that control locking mechanisms required for concurrency control " - + " between writers to a Hudi table. Concurrency between Hudi's own table services " - + " are auto managed internally.") + subGroupName = ConfigGroups.SubGroupNames.LOCK, + areCommonConfigs = true, + description = "") public class HoodieLockConfig extends HoodieConfig { public static final ConfigProperty<String> LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS = ConfigProperty diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteCommitCallbackConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteCommitCallbackConfig.java index 09c2b09be03..8be57c05d1c 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteCommitCallbackConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteCommitCallbackConfig.java @@ -32,8 +32,9 @@ import java.util.Properties; */ @ConfigClassProperty(name = "Write commit callback configs", groupName = ConfigGroups.Names.WRITE_CLIENT, - description = "Controls callback behavior into HTTP endpoints, to push " - + " notifications on commits on hudi tables.") + subGroupName = ConfigGroups.SubGroupNames.COMMIT_CALLBACK, + areCommonConfigs = true, + description = "") public class HoodieWriteCommitCallbackConfig extends HoodieConfig { public static final String CALLBACK_PREFIX = "hoodie.write.commit.callback."; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigClassProperty.java b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigClassProperty.java index f0a341eb824..9d746365b69 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigClassProperty.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigClassProperty.java @@ -26,7 +26,7 @@ import java.lang.annotation.Target; /** * Annotation for superclasses of {@link HoodieConfig} that includes the * human-readable name of the config class, the config group ({@link ConfigGroups}) - * it belongs to (e.g., spark/ flink/ write) + * it belongs to (e.g., spark/ flink/ write), optional sub-group ({@link ConfigGroups}), * and the description of the config class. */ @Retention(RetentionPolicy.RUNTIME) @@ -36,5 +36,9 @@ public @interface ConfigClassProperty { ConfigGroups.Names groupName(); + ConfigGroups.SubGroupNames subGroupName() default ConfigGroups.SubGroupNames.NONE; + + boolean areCommonConfigs() default false; + String description(); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java index 6fe8e5002a8..9bd61cba7b8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java @@ -45,11 +45,42 @@ public class ConfigGroups { } } + public enum SubGroupNames { + INDEX( + "Index Configs", + "Configurations that control indexing behavior, " + + "which tags incoming records as either inserts or updates to older records."), + LOCK( + "Lock Configs", + "Configurations that control locking mechanisms required for concurrency control " + + " between writers to a Hudi table. Concurrency between Hudi's own table services " + + " are auto managed internally."), + COMMIT_CALLBACK( + "Commit Callback Configs", + "Configurations controling callback behavior into HTTP endpoints, to push " + + "notifications on commits on hudi tables."), + NONE( + "None", + "No subgroup. This description should be hidden."); + + public final String name; + private final String description; + + SubGroupNames(String name, String description) { + this.name = name; + this.description = description; + } + + public String getDescription() { + return description; + } + } + public static String getDescription(Names names) { String description; switch (names) { case SPARK_DATASOURCE: - description = "These configs control the Hudi Spark Datasource, " + description = "These configs control the Hudi Spark Datasource, " + "providing ability to define keys/partitioning, pick out the write operation, " + "specify how to merge records or choosing query type to read."; break; @@ -66,8 +97,11 @@ public class ConfigGroups { + "write schema, cleaning etc. Although Hudi provides sane defaults, from time-time " + "these configs may need to be tweaked to optimize for specific workloads."; break; + case META_SYNC: + description = "Configurations used by the Hudi to sync metadata to external metastores and catalogs."; + break; case RECORD_PAYLOAD: - description = "This is the lowest level of customization offered by Hudi. " + description = "This is the lowest level of customization offered by Hudi. " + "Record payloads define how to produce new values to upsert based on incoming " + "new record and stored old record. Hudi provides default implementations such as " + "OverwriteWithLatestAvroPayload which simply update table with the latest/last-written record. " @@ -81,6 +115,9 @@ public class ConfigGroups { case KAFKA_CONNECT: description = "These set of configs are used for Kafka Connect Sink Connector for writing Hudi Tables"; break; + case AWS: + description = "Configurations specific to Amazon Web Services."; + break; default: description = "Please fill in the description for Config Group Name: " + names.name; break; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java index 08f36512c91..364fc4203a8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java @@ -34,7 +34,7 @@ import java.util.function.Function; /** * ConfigProperty describes a configuration property. It contains the configuration * key, deprecated older versions of the key, and an optional default value for the configuration, - * configuration descriptions and also the an infer mechanism to infer the configuration value + * configuration descriptions and also an inferring mechanism to infer the configuration value * based on other configurations. * * @param <T> The type of the default value. @@ -45,6 +45,8 @@ public class ConfigProperty<T> implements Serializable { private final T defaultValue; + private final String docOnDefaultValue; + private final String doc; private final Option<String> sinceVersion; @@ -58,10 +60,13 @@ public class ConfigProperty<T> implements Serializable { // provide the ability to infer config value based on other configs private final Option<Function<HoodieConfig, Option<T>>> inferFunction; - ConfigProperty(String key, T defaultValue, String doc, Option<String> sinceVersion, - Option<String> deprecatedVersion, Option<Function<HoodieConfig, Option<T>>> inferFunc, Set<String> validValues, String... alternatives) { + ConfigProperty(String key, T defaultValue, String docOnDefaultValue, String doc, + Option<String> sinceVersion, Option<String> deprecatedVersion, + Option<Function<HoodieConfig, Option<T>>> inferFunc, Set<String> validValues, + String... alternatives) { this.key = Objects.requireNonNull(key); this.defaultValue = defaultValue; + this.docOnDefaultValue = docOnDefaultValue; this.doc = doc; this.sinceVersion = sinceVersion; this.deprecatedVersion = deprecatedVersion; @@ -85,6 +90,11 @@ public class ConfigProperty<T> implements Serializable { return defaultValue != null; } + public String getDocOnDefaultValue() { + return StringUtils.isNullOrEmpty(docOnDefaultValue) + ? StringUtils.EMPTY_STRING : docOnDefaultValue; + } + public String doc() { return StringUtils.isNullOrEmpty(doc) ? StringUtils.EMPTY_STRING : doc; } @@ -97,7 +107,11 @@ public class ConfigProperty<T> implements Serializable { return deprecatedVersion; } - Option<Function<HoodieConfig, Option<T>>> getInferFunc() { + public boolean hasInferFunction() { + return getInferFunction().isPresent(); + } + + Option<Function<HoodieConfig, Option<T>>> getInferFunction() { return inferFunction; } @@ -115,32 +129,32 @@ public class ConfigProperty<T> implements Serializable { public ConfigProperty<T> withDocumentation(String doc) { Objects.requireNonNull(doc); - return new ConfigProperty<>(key, defaultValue, doc, sinceVersion, deprecatedVersion, inferFunction, validValues, alternatives); + return new ConfigProperty<>(key, defaultValue, docOnDefaultValue, doc, sinceVersion, deprecatedVersion, inferFunction, validValues, alternatives); } public ConfigProperty<T> withValidValues(String... validValues) { Objects.requireNonNull(validValues); - return new ConfigProperty<>(key, defaultValue, doc, sinceVersion, deprecatedVersion, inferFunction, new HashSet<>(Arrays.asList(validValues)), alternatives); + return new ConfigProperty<>(key, defaultValue, docOnDefaultValue, doc, sinceVersion, deprecatedVersion, inferFunction, new HashSet<>(Arrays.asList(validValues)), alternatives); } public ConfigProperty<T> withAlternatives(String... alternatives) { Objects.requireNonNull(alternatives); - return new ConfigProperty<>(key, defaultValue, doc, sinceVersion, deprecatedVersion, inferFunction, validValues, alternatives); + return new ConfigProperty<>(key, defaultValue, docOnDefaultValue, doc, sinceVersion, deprecatedVersion, inferFunction, validValues, alternatives); } public ConfigProperty<T> sinceVersion(String sinceVersion) { Objects.requireNonNull(sinceVersion); - return new ConfigProperty<>(key, defaultValue, doc, Option.of(sinceVersion), deprecatedVersion, inferFunction, validValues, alternatives); + return new ConfigProperty<>(key, defaultValue, docOnDefaultValue, doc, Option.of(sinceVersion), deprecatedVersion, inferFunction, validValues, alternatives); } public ConfigProperty<T> deprecatedAfter(String deprecatedVersion) { Objects.requireNonNull(deprecatedVersion); - return new ConfigProperty<>(key, defaultValue, doc, sinceVersion, Option.of(deprecatedVersion), inferFunction, validValues, alternatives); + return new ConfigProperty<>(key, defaultValue, docOnDefaultValue, doc, sinceVersion, Option.of(deprecatedVersion), inferFunction, validValues, alternatives); } public ConfigProperty<T> withInferFunction(Function<HoodieConfig, Option<T>> inferFunction) { Objects.requireNonNull(inferFunction); - return new ConfigProperty<>(key, defaultValue, doc, sinceVersion, deprecatedVersion, Option.of(inferFunction), validValues, alternatives); + return new ConfigProperty<>(key, defaultValue, docOnDefaultValue, doc, sinceVersion, deprecatedVersion, Option.of(inferFunction), validValues, alternatives); } /** @@ -174,13 +188,22 @@ public class ConfigProperty<T> implements Serializable { } public <T> ConfigProperty<T> defaultValue(T value) { + return defaultValue(value, ""); + } + + public <T> ConfigProperty<T> defaultValue(T value, String docOnDefaultValue) { Objects.requireNonNull(value); - ConfigProperty<T> configProperty = new ConfigProperty<>(key, value, "", Option.empty(), Option.empty(), Option.empty(), Collections.emptySet()); + Objects.requireNonNull(docOnDefaultValue); + ConfigProperty<T> configProperty = new ConfigProperty<>(key, value, docOnDefaultValue, "", Option.empty(), Option.empty(), Option.empty(), Collections.emptySet()); return configProperty; } public ConfigProperty<String> noDefaultValue() { - ConfigProperty<String> configProperty = new ConfigProperty<>(key, null, "", Option.empty(), + return noDefaultValue(""); + } + + public ConfigProperty<String> noDefaultValue(String docOnDefaultValue) { + ConfigProperty<String> configProperty = new ConfigProperty<>(key, null, docOnDefaultValue, "", Option.empty(), Option.empty(), Option.empty(), Collections.emptySet()); return configProperty; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java index 223b93e5744..672ea75457c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java @@ -64,11 +64,22 @@ public class HoodieConfig implements Serializable { props.putAll(properties); } + /** + * Sets the default value of a config if user does not set it already. + * The default value can only be set if the config property has a built-in + * default value or an infer function. When the infer function is present, + * the infer function is used first to derive the config value based on other + * configs. If the config value cannot be inferred, the built-in default value + * is used if present. + * + * @param configProperty Config to set a default value. + * @param <T> Data type of the config. + */ public <T> void setDefaultValue(ConfigProperty<T> configProperty) { if (!contains(configProperty)) { Option<T> inferValue = Option.empty(); - if (configProperty.getInferFunc().isPresent()) { - inferValue = configProperty.getInferFunc().get().apply(this); + if (configProperty.hasInferFunction()) { + inferValue = configProperty.getInferFunction().get().apply(this); } if (inferValue.isPresent() || configProperty.hasDefaultValue()) { props.setProperty( @@ -120,7 +131,7 @@ public class HoodieConfig implements Serializable { .forEach(f -> { try { ConfigProperty<?> cfgProp = (ConfigProperty<?>) f.get("null"); - if (cfgProp.hasDefaultValue() || cfgProp.getInferFunc().isPresent()) { + if (cfgProp.hasDefaultValue() || cfgProp.hasInferFunction()) { setDefaultValue(cfgProp); } } catch (IllegalAccessException e) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java index c1f64d3baeb..51d44358fba 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java @@ -20,8 +20,6 @@ package org.apache.hudi.common.table; import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex; -import org.apache.hudi.common.config.ConfigClassProperty; -import org.apache.hudi.common.config.ConfigGroups; import org.apache.hudi.common.config.ConfigProperty; import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.OrderedProperties; @@ -75,14 +73,6 @@ import static org.apache.hudi.common.table.cdc.HoodieCDCSupplementalLoggingMode. * @see HoodieTableMetaClient * @since 0.3.0 */ -@ConfigClassProperty(name = "Table Configurations", - groupName = ConfigGroups.Names.WRITE_CLIENT, - description = "Configurations that persist across writes and read on a Hudi table " - + " like base, log file formats, table name, creation schema, table version layouts. " - + " Configurations are loaded from hoodie.properties, these properties are usually set during " - + "initializing a path as hoodie base path and rarely changes during " - + "the lifetime of the table. Writers/Queries' configurations are validated against these " - + " each time for compatibility.") public class HoodieTableConfig extends HoodieConfig { private static final Logger LOG = LogManager.getLogger(HoodieTableConfig.class); diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java index eb9affe3085..ba7523d1031 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java @@ -56,9 +56,10 @@ import static org.apache.hudi.common.table.HoodieTableConfig.URL_ENCODE_PARTITIO * Configs needed to sync data into external meta stores, catalogs, etc. */ @Immutable -@ConfigClassProperty(name = "Metadata Sync Configs", +@ConfigClassProperty(name = "Common Metadata Sync Configs", groupName = ConfigGroups.Names.META_SYNC, - description = "Configurations used by the Hudi to sync metadata to external metastores and catalogs.") + areCommonConfigs = true, + description = "") public class HoodieSyncConfig extends HoodieConfig { private static final Logger LOG = LogManager.getLogger(HoodieSyncConfig.class); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/kafka/HoodieWriteCommitKafkaCallbackConfig.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/kafka/HoodieWriteCommitKafkaCallbackConfig.java index 0ec91e129cb..bcd6dd9665a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/kafka/HoodieWriteCommitKafkaCallbackConfig.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/kafka/HoodieWriteCommitKafkaCallbackConfig.java @@ -29,6 +29,7 @@ import static org.apache.hudi.config.HoodieWriteCommitCallbackConfig.CALLBACK_PR */ @ConfigClassProperty(name = "Write commit Kafka callback configs", groupName = ConfigGroups.Names.WRITE_CLIENT, + subGroupName = ConfigGroups.SubGroupNames.COMMIT_CALLBACK, description = "Controls notifications sent to Kafka, on events happening to a hudi table.") public class HoodieWriteCommitKafkaCallbackConfig extends HoodieConfig { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/pulsar/HoodieWriteCommitPulsarCallbackConfig.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/pulsar/HoodieWriteCommitPulsarCallbackConfig.java index f185e6b5176..e777d5b66b8 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/pulsar/HoodieWriteCommitPulsarCallbackConfig.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/callback/pulsar/HoodieWriteCommitPulsarCallbackConfig.java @@ -29,6 +29,7 @@ import static org.apache.hudi.config.HoodieWriteCommitCallbackConfig.CALLBACK_PR */ @ConfigClassProperty(name = "Write commit pulsar callback configs", groupName = ConfigGroups.Names.WRITE_CLIENT, + subGroupName = ConfigGroups.SubGroupNames.COMMIT_CALLBACK, description = "Controls notifications sent to pulsar, on events happening to a hudi table.") public class HoodieWriteCommitPulsarCallbackConfig extends HoodieConfig {
