xushiyan commented on code in PR #7901:
URL: https://github.com/apache/hudi/pull/7901#discussion_r1120559992


##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala:
##########
@@ -75,7 +75,7 @@ trait ProvidesHoodieConfig extends Logging {
       HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> 
hiveSyncConfig.getBoolean(HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE).toString
     )
 
-    val overridingOpts = Map[String, String](
+    var overridingOpts = Map[String, String](

Review Comment:
   can we stick with `val` instead of `var` ? it's safer and less error-prone



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala:
##########
@@ -55,9 +51,6 @@ object HoodieWriterUtils {
     val hoodieConfig: HoodieConfig = new HoodieConfig(props)
     hoodieConfig.setDefaultValue(OPERATION)
     hoodieConfig.setDefaultValue(TABLE_TYPE)
-    hoodieConfig.setDefaultValue(PRECOMBINE_FIELD)
-    hoodieConfig.setDefaultValue(PAYLOAD_CLASS_NAME)
-    hoodieConfig.setDefaultValue(KEYGENERATOR_CLASS_NAME)

Review Comment:
   unnecessary move



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala:
##########
@@ -1024,8 +1029,9 @@ object HoodieSparkSqlWriter {
 
   private def getHoodieTableConfig(sparkContext: SparkContext,

Review Comment:
   javadoc to explain the usage will help demystify it - it's internally more 
complicated than the name `getHoodieTableConfig` suggests



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala:
##########
@@ -86,6 +79,17 @@ object HoodieWriterUtils {
     hoodieConfig.setDefaultValue(RECONCILE_SCHEMA)
     hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS)
     
hoodieConfig.setDefaultValue(KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED)
+    hoodieConfig.setDefaultValue(PAYLOAD_CLASS_NAME)
+    hoodieConfig.setDefaultValue(KEYGENERATOR_CLASS_NAME)
+    Map() ++ hoodieConfig.getProps.asScala ++ globalProps ++ 
DataSourceOptionsHelper.translateConfigurations(parameters)
+  }
+
+  def getParamsWithAlternatives(parameters: Map[String, String]): Map[String, 
String] = {
+    val globalProps = DFSPropertiesConfiguration.getGlobalProps.asScala
+    val props = new Properties()
+    props.putAll(parameters)
+    val hoodieConfig: HoodieConfig = new HoodieConfig(props)
+    // do not set any default as this is called before validation.

Review Comment:
   rename `parameters` to `overridingParams` makes the logic clearer. this 
applies to `parametersWithWriteDefaults()` too. 
   
   `WithAlternatives` is not clear enough. you meant the datasource's shorter 
form of option keys? this looks overlapping with `getParamsWithoutDefaults()` 
which also translate configs. Can we merge these 2?



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala:
##########
@@ -117,10 +121,7 @@ trait ProvidesHoodieConfig extends Logging {
 
     val partitionFieldsStr = hoodieCatalogTable.partitionFields.mkString(",")
 
-    // NOTE: Here we fallback to "" to make sure that null value is not 
overridden with
-    // default value ("ts")
-    // TODO(HUDI-3456) clean up
-    val preCombineField = hoodieCatalogTable.preCombineKey.getOrElse("")
+    val preCombineField = hoodieCatalogTable.preCombineKey.getOrElse(null)

Review Comment:
   does this cause a bug? or just a clean up after we make preCombineKey 
optional?



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala:
##########
@@ -197,17 +198,19 @@ trait ProvidesHoodieConfig extends Logging {
       HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> 
hiveSyncConfig.getStringOrDefault(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS)
     )
 
-    val overridingOpts = extraOptions ++ Map(
+    var overridingOpts = extraOptions ++ Map(
       "path" -> path,
       TABLE_TYPE.key -> tableType,
       TBL_NAME.key -> hoodieCatalogTable.tableName,
       OPERATION.key -> operation,
       HIVE_STYLE_PARTITIONING.key -> hiveStylePartitioningEnable,
       URL_ENCODE_PARTITIONING.key -> urlEncodePartitioning,
       RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
-      PRECOMBINE_FIELD.key -> preCombineField,
       PARTITIONPATH_FIELD.key -> partitionFieldsStr
     )
+    if (preCombineField != null) {
+      overridingOpts = overridingOpts ++ Map(PRECOMBINE_FIELD.key -> 
preCombineField)
+    }

Review Comment:
   this is actually not needed - `combineOptions()` filters out null values



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala:
##########
@@ -86,6 +79,17 @@ object HoodieWriterUtils {
     hoodieConfig.setDefaultValue(RECONCILE_SCHEMA)
     hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS)
     
hoodieConfig.setDefaultValue(KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED)
+    hoodieConfig.setDefaultValue(PAYLOAD_CLASS_NAME)
+    hoodieConfig.setDefaultValue(KEYGENERATOR_CLASS_NAME)
+    Map() ++ hoodieConfig.getProps.asScala ++ globalProps ++ 
DataSourceOptionsHelper.translateConfigurations(parameters)
+  }
+
+  def getParamsWithAlternatives(parameters: Map[String, String]): Map[String, 
String] = {
+    val globalProps = DFSPropertiesConfiguration.getGlobalProps.asScala
+    val props = new Properties()
+    props.putAll(parameters)
+    val hoodieConfig: HoodieConfig = new HoodieConfig(props)
+    // do not set any default as this is called before validation.

Review Comment:
   move this to javadoc so it's clear to dev when to use this



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala:
##########
@@ -150,6 +154,10 @@ object HoodieWriterUtils {
           && datasourcePreCombineKey != tableConfigPreCombineKey) {
           
diffConfigs.append(s"PreCombineKey:\t$datasourcePreCombineKey\t$tableConfigPreCombineKey\n")
         }
+        // not set in tableConfig, but incoming has some valid value
+        if (tableConfigPreCombineKey == null && datasourcePreCombineKey != 
null) {

Review Comment:
   this can be merged with above condition check right?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to