Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/20850#discussion_r175657704
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
---
@@ -178,81 +171,76 @@ object InterpretedUnsafeProjection extends
UnsafeProjectionCreator {
case StructType(fields) =>
val numFields = fields.length
- val rowWriter = new UnsafeRowWriter(bufferHolder, numFields)
- val structWriter = generateStructWriter(bufferHolder, rowWriter,
fields)
+ val rowWriter = new UnsafeRowWriter(writer, numFields)
+ val structWriter = generateStructWriter(rowWriter, fields)
(v, i) => {
- val tmpCursor = bufferHolder.cursor
+ rowWriter.markCursor()
v.getStruct(i, fields.length) match {
case row: UnsafeRow =>
writeUnsafeData(
- bufferHolder,
+ rowWriter,
row.getBaseObject,
row.getBaseOffset,
row.getSizeInBytes)
case row =>
// Nested struct. We don't know where this will start
because a row can be
// variable length, so we need to update the offsets and
zero out the bit mask.
- rowWriter.reset()
+ rowWriter.resetRowWriter()
structWriter.apply(row)
}
- writer.setOffsetAndSize(i, tmpCursor, bufferHolder.cursor -
tmpCursor)
+ writer.setOffsetAndSizeFromMark(i)
}
case ArrayType(elementType, containsNull) =>
- val arrayWriter = new UnsafeArrayWriter
- val elementSize = getElementSize(elementType)
+ val arrayWriter = new UnsafeArrayWriter(writer,
getElementSize(elementType))
val elementWriter = generateFieldWriter(
- bufferHolder,
arrayWriter,
elementType,
containsNull)
(v, i) => {
- val tmpCursor = bufferHolder.cursor
- writeArray(bufferHolder, arrayWriter, elementWriter,
v.getArray(i), elementSize)
- writer.setOffsetAndSize(i, tmpCursor, bufferHolder.cursor -
tmpCursor)
+ arrayWriter.markCursor()
--- End diff --
From the performance view, this abstraction may have more performance
impact since we move temporal value on local frame into [that on Java
stack](https://github.com/apache/spark/pull/20850/files#diff-e68c5a074209b9a20ee2aa42936571ceR103)
```
arrayWriter.markCursor()
writeArray(arrayWriter, elementWriter, v.getArray(i))
writer.setOffsetAndSizeFromMark(i)
```
Is this implementation enough from the balance of performance and
abstraction? Or, is it better to do like this?
```
val mark = arrayWriter.cursor()
writeArray(arrayWriter, elementWriter, v.getArray(i))
writer.setOffsetAndSizeFromMark(i, mark)
```
@maropo @hvanhovell WDYT?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]