gengliangwang commented on a change in pull request #31490:
URL: https://github.com/apache/spark/pull/31490#discussion_r658770562
##########
File path:
external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
##########
@@ -335,36 +336,32 @@ private[sql] class AvroDeserializer(
avroPath: Seq[String],
catalystPath: Seq[String],
applyFilters: Int => Boolean): (CatalystDataUpdater, GenericRecord) =>
Boolean = {
- val validFieldIndexes = ArrayBuffer.empty[Int]
- val fieldWriters = ArrayBuffer.empty[(CatalystDataUpdater, Any) => Unit]
-
- val avroSchemaHelper = new AvroUtils.AvroSchemaHelper(avroType, avroPath)
- val length = catalystType.length
- var i = 0
- while (i < length) {
- val catalystField = catalystType.fields(i)
- avroSchemaHelper.getFieldByName(catalystField.name) match {
- case Some(avroField) =>
- validFieldIndexes += avroField.pos()
+ val avroSchemaHelper =
+ new AvroUtils.AvroSchemaHelper(avroType, catalystType, avroPath,
positionalFieldMatch)
+
+ avroSchemaHelper.getCatalystFieldsWithoutMatch.filterNot(_.nullable) match
{
Review comment:
Why do we need this step which goes over the two schemas? We can just
check the length of the two schemas is the same and every Avro field has a
corresponding catalyst field in the loop below.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]