xushiyan commented on a change in pull request #4565:
URL: https://github.com/apache/hudi/pull/4565#discussion_r785484692
##########
File path:
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
##########
@@ -118,6 +123,18 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
rows
}
+ private def extractRequiredSchema(iter: Iterator[InternalRow]):
Iterator[InternalRow] = {
+ val tableAvroSchema = new Schema.Parser().parse(tableState.tableAvroSchema)
+ val requiredAvroSchema = new
Schema.Parser().parse(tableState.requiredAvroSchema)
Review comment:
this is not used?
##########
File path:
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
##########
@@ -118,6 +123,18 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
rows
}
+ private def extractRequiredSchema(iter: Iterator[InternalRow]):
Iterator[InternalRow] = {
Review comment:
the name could imply returning schema
```suggestion
private def toRowsWithRequiredSchema(iter: Iterator[InternalRow]):
Iterator[InternalRow] = {
```
##########
File path:
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
##########
@@ -54,15 +54,20 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
private val preCombineField = tableState.preCombineField
private val recordKeyFieldOpt = tableState.recordKeyFieldOpt
private val payloadProps = if (preCombineField.isDefined) {
-
Some(HoodiePayloadConfig.newBuilder.withPayloadOrderingField(preCombineField.get).build.getProps)
+ val properties = HoodiePayloadConfig.newBuilder
+ .withPayloadOrderingField(preCombineField.get)
+ .withPayloadEventTimeField(preCombineField.get)
Review comment:
> setting returnNullIfNotFound to true does not take effect when call
HoodieAvroUtils.getNestedFieldVal
this looks like a bug in `getNestedFieldVal()` ? is it fixable in spark 3.2?
##########
File path:
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
##########
@@ -729,6 +733,12 @@ object HoodieSparkSqlWriter {
mergedParams(key) = value
}
}
+
+ // use preCombineField to fill in PAYLOAD_ORDERING_FIELD_PROP_KEY and
PAYLOAD_EVENT_TIME_FIELD_PROP_KEY
+ if (mergedParams.contains(PRECOMBINE_FIELD.key())) {
+ mergedParams.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY,
mergedParams(PRECOMBINE_FIELD.key()))
+ mergedParams.put(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY,
mergedParams(PRECOMBINE_FIELD.key()))
+ }
Review comment:
the expected scenario should be: when `preCombineField` is set,
`PAYLOAD_ORDERING_FIELD_PROP_KEY` is set automatically, and
PAYLOAD_EVENT_TIME_FIELD_PROP_KEY is never required. It'll be good to fix these
along the way.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]