Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/20415#discussion_r165188943
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
---
@@ -326,22 +325,23 @@ case class FileSourceScanExec(
// 2) the number of columns should be smaller than
spark.sql.codegen.maxFields
WholeStageCodegenExec(this).execute()
} else {
- val unsafeRows = {
- val scan = inputRDD
- if (needsUnsafeRowConversion) {
- scan.mapPartitionsWithIndexInternal { (index, iter) =>
- val proj = UnsafeProjection.create(schema)
- proj.initialize(index)
- iter.map(proj)
- }
- } else {
- scan
- }
- }
val numOutputRows = longMetric("numOutputRows")
- unsafeRows.map { r =>
- numOutputRows += 1
- r
+
+ val scan = inputRDD
--- End diff --
nit: I think this is not needed and we can use `inputRDD`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]