Github user MaxGekk commented on a diff in the pull request:
https://github.com/apache/spark/pull/20894#discussion_r188635693
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
---
@@ -236,38 +236,44 @@ private[csv] object UnivocityParser {
*/
def tokenizeStream(
inputStream: InputStream,
- shouldDropHeader: Boolean,
+ dropFirstRecord: Boolean,
+ checkFirstRecord: Array[String] => Unit,
tokenizer: CsvParser): Iterator[Array[String]] = {
- convertStream(inputStream, shouldDropHeader, tokenizer)(tokens =>
tokens)
+ convertStream(inputStream, dropFirstRecord, tokenizer,
checkFirstRecord)(tokens => tokens)
}
/**
* Parses a stream that contains CSV strings and turns it into an
iterator of rows.
*/
def parseStream(
inputStream: InputStream,
- shouldDropHeader: Boolean,
+ dropFirstRecord: Boolean,
parser: UnivocityParser,
- schema: StructType): Iterator[InternalRow] = {
+ schema: StructType,
+ filePath: String,
--- End diff --
removed
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]