Github user dbtsai commented on a diff in the pull request:
https://github.com/apache/spark/pull/21847#discussion_r206350423
--- Diff:
external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala ---
@@ -87,17 +87,30 @@ class AvroSerializer(rootCatalystType: DataType,
rootAvroType: Schema, nullable:
case d: DecimalType =>
(getter, ordinal) => getter.getDecimal(ordinal, d.precision,
d.scale).toString
case StringType =>
- (getter, ordinal) => new
Utf8(getter.getUTF8String(ordinal).getBytes)
+ if (avroType.getType == Type.ENUM) {
+ (getter, ordinal) =>
+ new EnumSymbol(avroType,
getter.getUTF8String(ordinal).toString)
+ } else {
+ (getter, ordinal) =>
+ new Utf8(getter.getUTF8String(ordinal).getBytes)
+ }
case BinaryType =>
- (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal))
+ if (avroType.getType == Type.FIXED) {
+ // Handles fixed-type fields in output schema. Test case is
included in test.avro
+ // as it includes several fixed fields that would fail if we
specify schema
+ // on-write without this condition
+ (getter, ordinal) => new Fixed(avroType,
getter.getBinary(ordinal))
+ } else {
+ (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal))
+ }
case DateType =>
(getter, ordinal) => getter.getInt(ordinal) *
DateTimeUtils.MILLIS_PER_DAY
case TimestampType =>
(getter, ordinal) => getter.getLong(ordinal) / 1000
case ArrayType(et, containsNull) =>
val elementConverter = newConverter(
- et, resolveNullableType(avroType.getElementType, containsNull))
+ et, resolveUnionType(avroType.getElementType, et, containsNull))
(getter, ordinal) => {
--- End diff --
With `if (containsNull && arrayData.isNullAt(i))`, JVM can remove the
branching if it's not nullable. We can use array instead of arraybuffer to get
better performance.
```scala
(getter, ordinal) => {
val arrayData = getter.getArray(ordinal)
val length = arrayData.numElements()
val result = new Array[Any](length)
var i = 0
while (i < length) {
if (containsNull && arrayData.isNullAt(i)) {
result(i) = null
} else {
result(i) = elementConverter(arrayData, i)
}
i += 1
}
result
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]