Github user gengliangwang commented on a diff in the pull request:
https://github.com/apache/spark/pull/21948#discussion_r207235470
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala
---
@@ -113,11 +109,15 @@ object DataWritingSparkTask extends Logging {
val attemptId = context.attemptNumber()
val epochId =
Option(context.getLocalProperty(MicroBatchExecution.BATCH_ID_KEY)).getOrElse("0")
val dataWriter = writeTask.createDataWriter(partId, taskId,
epochId.toLong)
+ val copyIfNeeded: InternalRow => InternalRow =
+ if (writeTask.reuseDataObject()) identity else _.copy()
// write the data and commit this writer.
Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
while (iter.hasNext) {
- dataWriter.write(iter.next())
+ // Internally Spark reuse the same UnsafeRow instance when
producing output rows, here we
--- End diff --
nit: reuse => reuses
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]