Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/8971#discussion_r41353513
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala ---
@@ -365,32 +374,33 @@ private[sql] case class FIXED_DECIMAL(precision: Int,
scale: Int)
}
}
-private[sql] object FIXED_DECIMAL {
- val defaultSize = 8
-}
+private[sql] sealed abstract class ByteArrayColumnType[JvmType](val
defaultSize: Int)
+ extends ColumnType[JvmType] {
-private[sql] sealed abstract class ByteArrayColumnType(val defaultSize:
Int)
- extends ColumnType[Array[Byte]] {
+ def serialize(value: JvmType): Array[Byte]
+ def deserialize(bytes: Array[Byte]): JvmType
override def actualSize(row: InternalRow, ordinal: Int): Int = {
- getField(row, ordinal).length + 4
+ // TODO: grow the buffer in append(), so serialize() will not be
called twice
+ serialize(getField(row, ordinal)).length + 4
}
- override def append(v: Array[Byte], buffer: ByteBuffer): Unit = {
- buffer.putInt(v.length).put(v, 0, v.length)
+ override def append(v: JvmType, buffer: ByteBuffer): Unit = {
+ val bytes = serialize(v)
+ buffer.putInt(bytes.length).put(bytes, 0, bytes.length)
--- End diff --
looks like only binary type need the length header? struct, array, map all
have length info inside.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]