[GitHub] spark pull request #20894: [SPARK-23786][SQL] Checking column names of csv h...

HyukjinKwon Wed, 16 May 2018 01:48:24 -0700

Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20894#discussion_r188546460
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
 ---
    @@ -287,27 +292,18 @@ private[csv] object UnivocityParser {
        */
       def parseIterator(
           lines: Iterator[String],
    -      shouldDropHeader: Boolean,
           parser: UnivocityParser,
           schema: StructType): Iterator[InternalRow] = {
         val options = parser.options
     
    -    val linesWithoutHeader = if (shouldDropHeader) {
    -      // Note that if there are only comments in the first block, the 
header would probably
    -      // be not dropped.
    -      CSVUtils.dropHeaderLine(lines, options)
    -    } else {
    -      lines
    -    }
    -
    -    val filteredLines: Iterator[String] =
    -      CSVUtils.filterCommentAndEmpty(linesWithoutHeader, options)
    +    val filteredLines: Iterator[String] = 
CSVUtils.filterCommentAndEmpty(lines, options)
     
         val safeParser = new FailureSafeParser[String](
           input => Seq(parser.parse(input)),
           parser.options.parseMode,
           schema,
           parser.options.columnNameOfCorruptRecord)
    +
    --- End diff --
    
    i'd revert it back



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #20894: [SPARK-23786][SQL] Checking column names of csv h...

Reply via email to