xushiyan commented on code in PR #7901:
URL: https://github.com/apache/hudi/pull/7901#discussion_r1118241518
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala:
##########
@@ -120,16 +121,19 @@ object HoodieSparkSqlWriter {
val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
tableExists = fs.exists(new Path(basePath,
HoodieTableMetaClient.METAFOLDER_NAME))
- var tableConfig = getHoodieTableConfig(sparkContext, path,
hoodieTableConfigOpt)
- val (parameters, hoodieConfig) = mergeParamsAndGetHoodieConfig(optParams,
tableConfig, mode)
- val originKeyGeneratorClassName =
HoodieWriterUtils.getOriginKeyGenerator(parameters)
+ var tableConfig = if (mode == SaveMode.Overwrite || !tableExists) null
else getHoodieTableConfig(sparkContext, path, hoodieTableConfigOpt)
Review Comment:
`tableExists` is handled in `getHoodieTableConfig` already. Also it's too
early to check `SaveMode` here, which leads to confusion about why tableConfig
being null while on storage it's non-null.
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala:
##########
@@ -86,6 +79,20 @@ object HoodieWriterUtils {
hoodieConfig.setDefaultValue(RECONCILE_SCHEMA)
hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS)
hoodieConfig.setDefaultValue(KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED)
+ if (isFreshTable) { // only set default values for a fresh table. these
might be used for config validation in subsequent commits and hence
+ // we should not be setting any defaults. (infer function could result
in setting wrong defaults)
Review Comment:
> should not be setting any defaults.
confused by this comment. aren't you setting defaults below?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala:
##########
@@ -658,7 +662,8 @@ object HoodieSparkSqlWriter {
val sparkContext = sqlContext.sparkContext
val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
tableExists = fs.exists(new Path(basePath,
HoodieTableMetaClient.METAFOLDER_NAME))
- val tableConfig = getHoodieTableConfig(sparkContext, path,
hoodieTableConfigOpt)
+ if (mode == SaveMode.Overwrite || !tableExists) null else
getHoodieTableConfig(sparkContext, path, hoodieTableConfigOpt)
+ val tableConfig = if (mode == SaveMode.Overwrite || !tableExists) null
else getHoodieTableConfig(sparkContext, path, hoodieTableConfigOpt)
Review Comment:
some repeated logic, should try some refactoring here.
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala:
##########
@@ -1021,17 +1026,20 @@ object HoodieSparkSqlWriter {
private def mergeParamsAndGetHoodieConfig(optParams: Map[String, String],
tableConfig: HoodieTableConfig,
mode: SaveMode): (Map[String, String], HoodieConfig) = {
val translatedOptions =
DataSourceWriteOptions.translateSqlOptions(optParams)
- val mergedParams = mutable.Map.empty ++
HoodieWriterUtils.parametersWithWriteDefaults(translatedOptions)
+ val mergedParams = mutable.Map.empty ++
HoodieWriterUtils.parametersWithWriteDefaults(translatedOptions, tableConfig ==
null)
Review Comment:
it looks unintuitive to grasp tableConfig == null indicates fresh table or
table to be init/re-written.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]