Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20894#discussion_r188546460
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
---
@@ -287,27 +292,18 @@ private[csv] object UnivocityParser {
*/
def parseIterator(
lines: Iterator[String],
- shouldDropHeader: Boolean,
parser: UnivocityParser,
schema: StructType): Iterator[InternalRow] = {
val options = parser.options
- val linesWithoutHeader = if (shouldDropHeader) {
- // Note that if there are only comments in the first block, the
header would probably
- // be not dropped.
- CSVUtils.dropHeaderLine(lines, options)
- } else {
- lines
- }
-
- val filteredLines: Iterator[String] =
- CSVUtils.filterCommentAndEmpty(linesWithoutHeader, options)
+ val filteredLines: Iterator[String] =
CSVUtils.filterCommentAndEmpty(lines, options)
val safeParser = new FailureSafeParser[String](
input => Seq(parser.parse(input)),
parser.options.parseMode,
schema,
parser.options.columnNameOfCorruptRecord)
+
--- End diff --
i'd revert it back
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]