Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21657#discussion_r199033421 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala --- @@ -131,20 +132,30 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister { ) } val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis + val columnPruning = sparkSession.sessionState.conf.csvColumnPruning (file: PartitionedFile) => { val conf = broadcastedHadoopConf.value.value val parser = new UnivocityParser( StructType(dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)), StructType(requiredSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)), parsedOptions) - CSVDataSource(parsedOptions).readFile( + val inputRows = CSVDataSource(parsedOptions).readFile( conf, file, parser, requiredSchema, dataSchema, caseSensitive) + + if (columnPruning) { + inputRows + } else { + val inputAttrs = dataSchema.toAttributes --- End diff -- IIUC, before the column pruning pr, `UnivocityParser` always required columns only. So, we don't need to project them in `CSVFileFormat`. cc: @HyukjinKwon https://github.com/apache/spark/blob/e3de6ab30d52890eb08578e55eb4a5d2b4e7aa35/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala#L50
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org