Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20894#discussion_r190137382
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
---
@@ -248,28 +248,32 @@ private[csv] object UnivocityParser {
*/
def parseStream(
inputStream: InputStream,
- shouldDropHeader: Boolean,
+ dropFirstRecord: Boolean,
parser: UnivocityParser,
- schema: StructType): Iterator[InternalRow] = {
+ schema: StructType,
+ checkFirstRecord: Array[String] => Unit): Iterator[InternalRow] = {
val tokenizer = parser.tokenizer
val safeParser = new FailureSafeParser[Array[String]](
input => Seq(parser.convert(input)),
parser.options.parseMode,
schema,
parser.options.columnNameOfCorruptRecord)
- convertStream(inputStream, shouldDropHeader, tokenizer) { tokens =>
+ convertStream(inputStream, dropFirstRecord, tokenizer,
checkFirstRecord) { tokens =>
safeParser.parse(tokens)
}.flatten
}
private def convertStream[T](
inputStream: InputStream,
- shouldDropHeader: Boolean,
--- End diff --
BTW, why did we rename this variable?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]