viirya commented on code in PR #41839:
URL: https://github.com/apache/spark/pull/41839#discussion_r1252548601
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala:
##########
@@ -89,15 +87,18 @@ case class ColumnarToRowExec(child: SparkPlan) extends
ColumnarToRowTransition w
override def doExecute(): RDD[InternalRow] = {
val numOutputRows = longMetric("numOutputRows")
val numInputBatches = longMetric("numInputBatches")
- // This avoids calling `output` in the RDD closure, so that we don't need
to include the entire
- // plan (this) in the closure.
- val localOutput = this.output
- child.executeColumnar().mapPartitionsInternal { batches =>
- val toUnsafe = UnsafeProjection.create(localOutput, localOutput)
- batches.flatMap { batch =>
- numInputBatches += 1
- numOutputRows += batch.numRows()
- batch.rowIterator().asScala.map(toUnsafe)
+ val evaluatorFactory =
+ new ColumnarToRowEvaluatorFactory(
+ child.output,
+ numOutputRows,
+ numInputBatches)
+
+ if (conf.usePartitionEvaluator) {
+ child.executeColumnar().mapPartitionsWithEvaluator(evaluatorFactory)
+ } else {
+ child.executeColumnar().mapPartitionsInternal { batches =>
+ val evaluator = evaluatorFactory.createEvaluator()
+ evaluator.eval(0, batches)
Review Comment:
We don't need pass real partition index?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]