xushiyan commented on code in PR #7783:
URL: https://github.com/apache/hudi/pull/7783#discussion_r1089845455
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala:
##########
@@ -448,7 +448,7 @@ object DataSourceWriteOptions {
val STREAMING_CHECKPOINT_IDENTIFIER: ConfigProperty[String] = ConfigProperty
.key("hoodie.datasource.write.streaming.checkpoint.identifier")
- .noDefaultValue()
+ .defaultValue("default_single_writer")
Review Comment:
prefix with `hoodie` ?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala:
##########
@@ -225,6 +227,27 @@ class HoodieStreamingSink(sqlContext: SQLContext,
}
}
+ private def validateMultiWriterConfigs(options: Map[String, String]) : Unit
= {
+ // to be filled
+ if
(WriteConcurrencyMode.valueOf(options.getOrDefault(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(),
+ HoodieWriteConfig.WRITE_CONCURRENCY_MODE.defaultValue())) ==
WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) {
Review Comment:
/nit static import `WRITE_CONCURRENCY_MODE` to avoid lengthy code
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala:
##########
@@ -225,6 +227,27 @@ class HoodieStreamingSink(sqlContext: SQLContext,
}
}
+ private def validateMultiWriterConfigs(options: Map[String, String]) : Unit
= {
+ // to be filled
+ if
(WriteConcurrencyMode.valueOf(options.getOrDefault(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(),
+ HoodieWriteConfig.WRITE_CONCURRENCY_MODE.defaultValue())) ==
WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) {
+ // ensure some valid value is set for identifier
+ assert(!options.contains(STREAMING_CHECKPOINT_IDENTIFIER), "For
multi-writer scenarios, please set "
Review Comment:
should use `checkState()` for throwing validation exception over generic
assert
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala:
##########
@@ -107,6 +107,8 @@ class HoodieStreamingSink(sqlContext: SQLContext,
return
}
+ validateMultiWriterConfigs(options)
Review Comment:
can we avoid validation per invocation? is there an init method hook? or a
common place where we already run some writer config validation.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]