umehrot2 commented on a change in pull request #3491:
URL: https://github.com/apache/hudi/pull/3491#discussion_r690820344



##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java
##########
@@ -107,8 +105,80 @@
       .defaultValue(HFileBootstrapIndex.class.getName())
       .sinceVersion("0.6.0")

Review comment:
       Rename to `BOOTSTRAP_INDEX_CLASS_NAME` ? Since we are changing all 
`_CLASS` to `_CLASS_NAME` ?

##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java
##########
@@ -49,22 +49,20 @@
       .noDefaultValue()
       .sinceVersion("0.6.0")
       .withDocumentation("Base path of the dataset that needs to be 
bootstrapped as a Hudi table");
-  @Deprecated
-  public static final String BOOTSTRAP_BASE_PATH_PROP = 
BOOTSTRAP_BASE_PATH.key();
 
-  public static final ConfigProperty<String> BOOTSTRAP_MODE_SELECTOR = 
ConfigProperty
+  public static final ConfigProperty<String> BOOTSTRAP_MODE_SELECTOR_CLASS = 
ConfigProperty
       .key("hoodie.bootstrap.mode.selector")
       .defaultValue(MetadataOnlyBootstrapModeSelector.class.getCanonicalName())
       .sinceVersion("0.6.0")
       .withDocumentation("Selects the mode in which each file/partition in the 
bootstrapped dataset gets bootstrapped");
 
-  public static final ConfigProperty<String> FULL_BOOTSTRAP_INPUT_PROVIDER = 
ConfigProperty
+  public static final ConfigProperty<String> 
FULL_BOOTSTRAP_INPUT_PROVIDER_CLASS = ConfigProperty

Review comment:
       Same here. Shall we choose between `_CLASS` and `_CLASS_NAME` and make 
that consistent ?

##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java
##########
@@ -90,33 +86,33 @@
           .sinceVersion("0.9.0")
           .withDocumentation("Number of partitions to skip from latest when 
choosing partitions to create ClusteringPlan");
 
-  public static final ConfigProperty<String> CLUSTERING_PLAN_SMALL_FILE_LIMIT 
= ConfigProperty
+  public static final ConfigProperty<String> 
CLUSTERING_STRATEGY_SMALL_FILE_LIMIT = ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "small.file.limit")
       .defaultValue(String.valueOf(600 * 1024 * 1024L))
       .sinceVersion("0.7.0")
       .withDocumentation("Files smaller than the size specified here are 
candidates for clustering");
 
-  public static final ConfigProperty<String> CLUSTERING_MAX_BYTES_PER_GROUP = 
ConfigProperty
+  public static final ConfigProperty<String> 
CLUSTERING_STRATEGY_MAX_BYTES_PER_OUTPUT_FILEGROUP = ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "max.bytes.per.group")
       .defaultValue(String.valueOf(2 * 1024 * 1024 * 1024L))
       .sinceVersion("0.7.0")
       .withDocumentation("Each clustering operation can create multiple output 
file groups. Total amount of data processed by clustering operation"
           + " is defined by below two properties 
(CLUSTERING_MAX_BYTES_PER_GROUP * CLUSTERING_MAX_NUM_GROUPS)."
           + " Max amount of data to be included in one group");
 
-  public static final ConfigProperty<String> CLUSTERING_MAX_NUM_GROUPS = 
ConfigProperty
+  public static final ConfigProperty<String> CLUSTERING_STRATEGY_MAX_GROUPS = 
ConfigProperty

Review comment:
       Shall we make this consistent, either `FILEGROUP` or `GROUP` ?

##########
File path: 
hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java
##########
@@ -61,11 +61,11 @@ public DataSourceReader createReader(DataSourceOptions 
options) {
       DataSourceOptions options) {
     String instantTime = 
options.get(DataSourceInternalWriterHelper.INSTANT_TIME_OPT_KEY).get();
     String path = options.get("path").get();
-    String tblName = options.get(HoodieWriteConfig.TABLE_NAME.key()).get();
+    String tblName = 
options.get(HoodieWriteConfig.TABLE_NAME_VALUE.key()).get();

Review comment:
       `TABLE_NAME_VALUE` does not seem very intuitive to me. And then doing 
`TABLE_NAME_VALUE.key()`.
   
   I wonder if using `TABLE_NAME_CFG` or `TABLE_NAME_CONF` was actually better, 
so we don't have think about coming up with alternative names and instead can 
just append this suffix to differentiate ? Just a thought.

##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java
##########
@@ -90,33 +86,33 @@
           .sinceVersion("0.9.0")
           .withDocumentation("Number of partitions to skip from latest when 
choosing partitions to create ClusteringPlan");
 
-  public static final ConfigProperty<String> CLUSTERING_PLAN_SMALL_FILE_LIMIT 
= ConfigProperty
+  public static final ConfigProperty<String> 
CLUSTERING_STRATEGY_SMALL_FILE_LIMIT = ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "small.file.limit")
       .defaultValue(String.valueOf(600 * 1024 * 1024L))
       .sinceVersion("0.7.0")
       .withDocumentation("Files smaller than the size specified here are 
candidates for clustering");
 
-  public static final ConfigProperty<String> CLUSTERING_MAX_BYTES_PER_GROUP = 
ConfigProperty
+  public static final ConfigProperty<String> 
CLUSTERING_STRATEGY_MAX_BYTES_PER_OUTPUT_FILEGROUP = ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "max.bytes.per.group")
       .defaultValue(String.valueOf(2 * 1024 * 1024 * 1024L))
       .sinceVersion("0.7.0")
       .withDocumentation("Each clustering operation can create multiple output 
file groups. Total amount of data processed by clustering operation"
           + " is defined by below two properties 
(CLUSTERING_MAX_BYTES_PER_GROUP * CLUSTERING_MAX_NUM_GROUPS)."
           + " Max amount of data to be included in one group");
 
-  public static final ConfigProperty<String> CLUSTERING_MAX_NUM_GROUPS = 
ConfigProperty
+  public static final ConfigProperty<String> CLUSTERING_STRATEGY_MAX_GROUPS = 
ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "max.num.groups")
       .defaultValue("30")
       .sinceVersion("0.7.0")
       .withDocumentation("Maximum number of groups to create as part of 
ClusteringPlan. Increasing groups will increase parallelism");
 
-  public static final ConfigProperty<String> CLUSTERING_TARGET_FILE_MAX_BYTES 
= ConfigProperty
+  public static final ConfigProperty<String> 
CLUSTERING_STRATEGY_TARGET_FILE_MAX_BYTES = ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "target.file.max.bytes")
       .defaultValue(String.valueOf(1024 * 1024 * 1024L))
       .sinceVersion("0.7.0")
       .withDocumentation("Each group can produce 'N' 
(CLUSTERING_MAX_GROUP_SIZE/CLUSTERING_TARGET_FILE_SIZE) output file groups");
 
-  public static final ConfigProperty<String> CLUSTERING_SORT_COLUMNS_PROPERTY 
= ConfigProperty
+  public static final ConfigProperty<String> CLUSTERING_SORT_COLUMNS = 
ConfigProperty

Review comment:
       This also has `CLUSTERING_STRATEGY_PARAM_PREFIX` in its key. Should this 
be `CLUSTERING_STRATEGY_SORT_COLUMNS` ?

##########
File path: 
hudi-spark-datasource/hudi-spark2/src/main/java/org/apache/hudi/internal/DefaultSource.java
##########
@@ -61,11 +61,11 @@ public DataSourceReader createReader(DataSourceOptions 
options) {
       DataSourceOptions options) {
     String instantTime = 
options.get(DataSourceInternalWriterHelper.INSTANT_TIME_OPT_KEY).get();
     String path = options.get("path").get();
-    String tblName = options.get(HoodieWriteConfig.TABLE_NAME.key()).get();
+    String tblName = 
options.get(HoodieWriteConfig.TABLE_NAME_VALUE.key()).get();

Review comment:
       `TBL_NAME` ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to