msamirkhan commented on a change in pull request #29354:
URL: https://github.com/apache/spark/pull/29354#discussion_r466063541
##########
File path:
external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
##########
@@ -367,15 +372,45 @@ class AvroDeserializer(
}
}
- private def createArrayData(elementType: DataType, length: Int): ArrayData =
elementType match {
- case BooleanType => UnsafeArrayData.fromPrimitiveArray(new
Array[Boolean](length))
- case ByteType => UnsafeArrayData.fromPrimitiveArray(new
Array[Byte](length))
- case ShortType => UnsafeArrayData.fromPrimitiveArray(new
Array[Short](length))
- case IntegerType => UnsafeArrayData.fromPrimitiveArray(new
Array[Int](length))
- case LongType => UnsafeArrayData.fromPrimitiveArray(new
Array[Long](length))
- case FloatType => UnsafeArrayData.fromPrimitiveArray(new
Array[Float](length))
- case DoubleType => UnsafeArrayData.fromPrimitiveArray(new
Array[Double](length))
- case _ => new GenericArrayData(new Array[Any](length))
+ private def getArrayDataCreator(elementType: DataType): Int => ArrayData =
elementType match {
+ case BooleanType => length => UnsafeArrayData.createFreshArray(length, 1)
+ case ByteType => length => UnsafeArrayData.createFreshArray(length, 1)
+ case ShortType => length => UnsafeArrayData.createFreshArray(length, 2)
+ case IntegerType => length => UnsafeArrayData.createFreshArray(length, 4)
+ case LongType => length => UnsafeArrayData.createFreshArray(length, 8)
+ case FloatType => length => UnsafeArrayData.createFreshArray(length, 4)
+ case DoubleType => length => UnsafeArrayData.createFreshArray(length, 8)
+ case _ => length => new GenericArrayData(new Array[Any](length))
+ }
+
+ private def getRowCreator(st: StructType): () => InternalRow = {
+ val constructorsArray = new Array[Unit => MutableValue](st.fields.length)
+ var i = 0
+ while (i < st.fields.length) {
+ st.fields(i).dataType match {
+ case BooleanType => constructorsArray(i) = _ => new MutableBoolean
+ case ByteType => constructorsArray(i) = _ => new MutableByte
+ case ShortType => constructorsArray(i) = _ => new MutableShort
+ // We use INT for DATE internally
+ case IntegerType | DateType => constructorsArray(i) = _ => new
MutableInt
+ // We use Long for Timestamp internally
+ case LongType | TimestampType => constructorsArray(i) = _ => new
MutableLong
+ case FloatType => constructorsArray(i) = _ => new MutableFloat
+ case DoubleType => constructorsArray(i) = _ => new MutableDouble
+ case _ => constructorsArray(i) = _ => new MutableAny
+ }
+ i += 1
+ }
+
+ () => {
+ val array = new Array[MutableValue](constructorsArray.length)
+ var i = 0
+ while (i < constructorsArray.length) {
+ array(i) = constructorsArray(i)(Unit)
+ i += 1
+ }
+ new SpecificInternalRow(array)
+ }
Review comment:
PR with SpecificInternalRow constructor changes:
https://github.com/apache/spark/pull/29366
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]