Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/20619#discussion_r168910218
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
---
@@ -414,16 +417,16 @@ class ParquetFileFormat
} else {
new ParquetRecordReader[UnsafeRow](new
ParquetReadSupport(convertTz))
}
+ val recordReaderIterator = new RecordReaderIterator(reader)
+ // Register a task completion lister before `initalization`.
+ taskContext.foreach(_.addTaskCompletionListener(_ =>
recordReaderIterator.close()))
reader.initialize(split, hadoopAttemptContext)
- reader
+ recordReaderIterator
}
- val iter = new RecordReaderIterator(parquetReader)
- taskContext.foreach(_.addTaskCompletionListener(_ => iter.close()))
// UnsafeRowParquetRecordReader appends the columns internally to
avoid another copy.
- if (parquetReader.isInstanceOf[VectorizedParquetRecordReader] &&
- enableVectorizedReader) {
+ if (enableVectorizedReader) {
--- End diff --
yea it seems more reasonable to merge this if-else now.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]