Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/20894#discussion_r191819710
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
---
@@ -130,14 +130,16 @@ class CSVFileFormat extends TextBasedFileFormat with
DataSourceRegister {
"df.filter($\"_corrupt_record\".isNotNull).count()."
)
}
+ val caseSensitive =
sparkSession.sessionState.conf.caseSensitiveAnalysis
(file: PartitionedFile) => {
val conf = broadcastedHadoopConf.value.value
val parser = new UnivocityParser(
StructType(dataSchema.filterNot(_.name ==
parsedOptions.columnNameOfCorruptRecord)),
StructType(requiredSchema.filterNot(_.name ==
parsedOptions.columnNameOfCorruptRecord)),
parsedOptions)
- CSVDataSource(parsedOptions).readFile(conf, file, parser,
requiredSchema)
+ CSVDataSource(parsedOptions).readFile(conf, file, parser,
requiredSchema, dataSchema,
+ caseSensitive)
--- End diff --
Nit: the same here.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]