n3nash commented on a change in pull request #2927:
URL: https://github.com/apache/hudi/pull/2927#discussion_r630725611
##########
File path:
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
##########
@@ -95,17 +96,38 @@ object HoodieSparkUtils {
createRdd(df, avroSchema, structName, recordNamespace)
}
- def createRdd(df: DataFrame, avroSchema: Schema, structName: String,
recordNamespace: String)
+ def createRdd(df: DataFrame, latestSchema: Schema, structName: String,
recordNamespace: String): RDD[GenericRecord] = {
+ val avroSchema =
AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, structName,
recordNamespace)
+ // if schema generated from df.schema is same as latest schema, no special
handling is required.
+ if(TableSchemaResolver.isSchemaEquals(avroSchema, latestSchema)) {
+ createRdd(df, avroSchema, null, structName, recordNamespace)
+ } else { // if not, it means that table schema got evolved, but this batch
of records were generated with an older
+ // schema.
+ createRdd(df, avroSchema, latestSchema, structName, recordNamespace)
+ }
+ }
+
+ def createRdd(df: DataFrame, avroSchema: Schema, latestSchema: Schema,
structName: String, recordNamespace: String)
Review comment:
Is the scope of this method to be private ? If yes, can you change it so
this method is not invoked in other code paths
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]