Github user MaxGekk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20894#discussion_r176949718
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
 ---
    @@ -289,27 +294,52 @@ private[csv] object UnivocityParser {
        */
       def parseIterator(
           lines: Iterator[String],
    -      shouldDropHeader: Boolean,
           parser: UnivocityParser,
           schema: StructType): Iterator[InternalRow] = {
         val options = parser.options
     
    -    val linesWithoutHeader = if (shouldDropHeader) {
    -      // Note that if there are only comments in the first block, the 
header would probably
    -      // be not dropped.
    -      CSVUtils.dropHeaderLine(lines, options)
    -    } else {
    -      lines
    -    }
    -
         val filteredLines: Iterator[String] =
    -      CSVUtils.filterCommentAndEmpty(linesWithoutHeader, options)
    +      CSVUtils.filterCommentAndEmpty(lines, options)
     
         val safeParser = new FailureSafeParser[String](
           input => Seq(parser.parse(input)),
           parser.options.parseMode,
           schema,
           parser.options.columnNameOfCorruptRecord)
    +
         filteredLines.flatMap(safeParser.parse)
       }
    +
    +  def checkHeaderColumnNames(
    +    parser: UnivocityParser,
    +    schema: StructType,
    +    columnNames: Array[String],
    +    fileName: String
    +  ): Unit = {
    +    if (parser.options.checkHeader && columnNames != null) {
    +      val fieldNames = schema.map(_.name)
    +      val isMatched = fieldNames.zip(columnNames).forall { pair =>
    +        val (nameInSchema, nameInHeader) = pair
    +        nameInSchema == nameInHeader
    --- End diff --
    
    We do not declare case sensitivity of CSV inputs in our docs. Also I have 
not found explicit statement in csv descriptions about case sensitivity.  It 
seems it is up to implementations how to handle such cases. For example, Apache 
Commons allow to configure the behavior: 
https://commons.apache.org/proper/commons-csv/apidocs/index.html . 


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to