Github user dbtsai commented on a diff in the pull request: https://github.com/apache/spark/pull/21847#discussion_r206350423 --- Diff: external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala --- @@ -87,17 +87,30 @@ class AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: case d: DecimalType => (getter, ordinal) => getter.getDecimal(ordinal, d.precision, d.scale).toString case StringType => - (getter, ordinal) => new Utf8(getter.getUTF8String(ordinal).getBytes) + if (avroType.getType == Type.ENUM) { + (getter, ordinal) => + new EnumSymbol(avroType, getter.getUTF8String(ordinal).toString) + } else { + (getter, ordinal) => + new Utf8(getter.getUTF8String(ordinal).getBytes) + } case BinaryType => - (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal)) + if (avroType.getType == Type.FIXED) { + // Handles fixed-type fields in output schema. Test case is included in test.avro + // as it includes several fixed fields that would fail if we specify schema + // on-write without this condition + (getter, ordinal) => new Fixed(avroType, getter.getBinary(ordinal)) + } else { + (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal)) + } case DateType => (getter, ordinal) => getter.getInt(ordinal) * DateTimeUtils.MILLIS_PER_DAY case TimestampType => (getter, ordinal) => getter.getLong(ordinal) / 1000 case ArrayType(et, containsNull) => val elementConverter = newConverter( - et, resolveNullableType(avroType.getElementType, containsNull)) + et, resolveUnionType(avroType.getElementType, et, containsNull)) (getter, ordinal) => { --- End diff -- With `if (containsNull && arrayData.isNullAt(i))`, JVM can remove the branching if it's not nullable. We can use array instead of arraybuffer to get better performance. ```scala (getter, ordinal) => { val arrayData = getter.getArray(ordinal) val length = arrayData.numElements() val result = new Array[Any](length) var i = 0 while (i < length) { if (containsNull && arrayData.isNullAt(i)) { result(i) = null } else { result(i) = elementConverter(arrayData, i) } i += 1 } result } ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org