Github user dbtsai commented on a diff in the pull request: https://github.com/apache/spark/pull/21847#discussion_r205685728 --- Diff: external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala --- @@ -165,16 +183,112 @@ class AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: result } - private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = { - if (nullable) { - // avro uses union to represent nullable type. - val fields = avroType.getTypes.asScala - assert(fields.length == 2) - val actualType = fields.filter(_.getType != NULL) - assert(actualType.length == 1) - actualType.head + // Resolve an Avro union against a supplied DataType, i.e. a LongType compared against + // a ["null", "long"] should return a schema of type Schema.Type.LONG + // This function also handles resolving a DataType against unions of 2 or more types, i.e. + // an IntType resolves against a ["int", "long", "null"] will correctly return a schema of + // type Schema.Type.LONG + private def resolveNullableType(avroType: Schema, catalystType: DataType, + nullable: Boolean): Schema = { + (nullable, avroType.getType) match { + case (false, Type.UNION) | (true, Type.UNION) => + // avro uses union to represent nullable type. + val fieldTypes = avroType.getTypes.asScala + + // If we're nullable, we need to have at least two types. Cases with more than two types + // are captured in test("read read-write, read-write w/ schema, read") w/ test.avro input + assert(fieldTypes.length >= 2) --- End diff -- When it's non-nullable, is it possible to have `fieldTypes.length == 1`?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org