codope commented on code in PR #5854:
URL: https://github.com/apache/hudi/pull/5854#discussion_r912363955
##########
hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java:
##########
@@ -165,24 +140,85 @@ public class HoodieSyncConfig extends HoodieConfig {
.defaultValue("")
.withDocumentation("The spark version used when syncing with a
metastore.");
- public HoodieSyncConfig(TypedProperties props) {
+ private Configuration hadoopConf;
+
+ public HoodieSyncConfig(Properties props) {
+ this(props, ConfigUtils.createHadoopConf(props));
+ }
+
+ public HoodieSyncConfig(Properties props, Configuration hadoopConf) {
super(props);
- setDefaults();
-
- this.basePath = getStringOrDefault(META_SYNC_BASE_PATH);
- this.databaseName = getStringOrDefault(META_SYNC_DATABASE_NAME);
- this.tableName = getStringOrDefault(META_SYNC_TABLE_NAME);
- this.baseFileFormat = getStringOrDefault(META_SYNC_BASE_FILE_FORMAT);
- this.partitionFields =
props.getStringList(META_SYNC_PARTITION_FIELDS.key(), ",",
Collections.emptyList());
- this.partitionValueExtractorClass =
getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS);
- this.assumeDatePartitioning =
getBooleanOrDefault(META_SYNC_ASSUME_DATE_PARTITION);
- this.decodePartition =
getBooleanOrDefault(KeyGeneratorOptions.URL_ENCODE_PARTITIONING);
- this.useFileListingFromMetadata =
getBooleanOrDefault(META_SYNC_USE_FILE_LISTING_FROM_METADATA);
- this.isConditionalSync = getBooleanOrDefault(META_SYNC_CONDITIONAL_SYNC);
- this.sparkVersion = getStringOrDefault(META_SYNC_SPARK_VERSION);
+ this.hadoopConf = hadoopConf;
+ }
+
+ public void setHadoopConf(Configuration hadoopConf) {
+ this.hadoopConf = hadoopConf;
+ }
+
+ public Configuration getHadoopConf() {
+ return hadoopConf;
+ }
+
+ public FileSystem getHadoopFileSystem() {
+ return FSUtils.getFs(getString(META_SYNC_BASE_PATH), getHadoopConf());
}
- protected void setDefaults() {
- this.setDefaultValue(META_SYNC_TABLE_NAME);
+ public String getAbsoluteBasePath() {
+ return getString(META_SYNC_BASE_PATH);
+ }
+
+ @Override
+ public String toString() {
+ return props.toString();
+ }
+
+ public static class HoodieSyncConfigParams {
+ @Parameter(names = {"--database"}, description = "name of the target
database in meta store", required = true)
+ public String databaseName;
+ @Parameter(names = {"--table"}, description = "name of the target table in
meta store", required = true)
+ public String tableName;
+ @Parameter(names = {"--base-path"}, description = "Base path of the hoodie
table to sync", required = true)
+ public String basePath;
+ @Parameter(names = {"--base-file-format"}, description = "Format of the
base files (PARQUET (or) HFILE)")
+ public String baseFileFormat;
+ @Parameter(names = "--partitioned-by", description = "Fields in the schema
partitioned by")
+ public List<String> partitionFields;
+ @Parameter(names = "--partition-value-extractor", description = "Class
which implements PartitionValueExtractor "
+ + "to extract the partition values from HDFS path")
+ public String partitionValueExtractorClass;
+ @Parameter(names = {"--assume-date-partitioning"}, description = "Assume
standard yyyy/mm/dd partitioning, this"
+ + " exists to support backward compatibility. If you use hoodie 0.3.x,
do not set this parameter")
+ public Boolean assumeDatePartitioning;
+ @Parameter(names = {"--decode-partition"}, description = "Decode the
partition value if the partition has encoded during writing")
+ public Boolean decodePartition;
+ @Parameter(names = {"--use-file-listing-from-metadata"}, description =
"Fetch file listing from Hudi's metadata")
+ public Boolean useFileListingFromMetadata;
+ @Parameter(names = {"--conditional-sync"}, description = "If true, only
sync on conditions like schema change or partition change.")
+ public Boolean isConditionalSync;
+ @Parameter(names = {"--spark-version"}, description = "The spark version")
+ public String sparkVersion;
+
+ @Parameter(names = {"--help", "-h"}, help = true)
+ public boolean help = false;
+
+ public boolean isHelp() {
+ return help;
+ }
+
+ public TypedProperties toProps() {
+ final TypedProperties props = new TypedProperties();
+ props.setPropertyIfNonNull(META_SYNC_BASE_PATH.key(), basePath);
+ props.setPropertyIfNonNull(META_SYNC_DATABASE_NAME.key(), databaseName);
+ props.setPropertyIfNonNull(META_SYNC_TABLE_NAME.key(), tableName);
+ props.setPropertyIfNonNull(META_SYNC_BASE_FILE_FORMAT.key(),
baseFileFormat);
+ props.setPropertyIfNonNull(META_SYNC_PARTITION_FIELDS.key(),
StringUtils.join(",", partitionFields));
Review Comment:
Agree, proper default should be `noDefaultValue`. I am ok with a separate PR
with default changes. Actually, that's more preferable.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]