Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19803#discussion_r152938354
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
---
@@ -108,20 +108,22 @@ trait CodegenSupport extends SparkPlan {
/**
* Consume the generated columns or row from current SparkPlan, call its
parent's `doConsume()`.
+ *
+ * Note that `outputVars` and `row` can't both be null.
*/
final def consume(ctx: CodegenContext, outputVars: Seq[ExprCode], row:
String = null): String = {
val inputVars =
- if (row != null) {
+ if (outputVars != null) {
+ assert(outputVars.length == output.length)
+ // outputVars will be used to generate the code for UnsafeRow, so
we should copy them
+ outputVars.map(_.copy())
+ } else {
+ assert(row != null, "outputVars and row cannot both be null.")
ctx.currentVars = null
ctx.INPUT_ROW = row
output.zipWithIndex.map { case (attr, i) =>
BoundReference(i, attr.dataType, attr.nullable).genCode(ctx)
}
- } else {
--- End diff --
correction: `FileSourceScanExec` may provide both `outputVars` and `row`,
but its `outputVars` is exactly what we generate here, so still no behavior
change.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]